In [None]:
import requests
import datetime
import time
import csv

from dotenv import load_dotenv
import os

load_dotenv('../.api_key')
WEATHER_API_KEY = os.getenv('WEATHER_API_KEY')
BASE_URL = 'http://api.weatherapi.com/v1/history.json'
LOCATION = 'New York'

start_date = datetime.date(2019, 1, 1)
end_date = datetime.date.today()  

delta = datetime.timedelta(days=1)  # Max 7 days per API request

current_date = start_date

# Open a CSV file to write the data
with open('nyc_weather_data.csv', mode='w', newline='', encoding='utf-8') as csv_file:
    fieldnames = [
        'date', 'time', 'temp_c', 'temp_f', 'precip_mm', 'precip_in',
        'humidity', 'wind_kph', 'wind_mph', 'wind_dir', 'pressure_mb',
        'pressure_in', 'cloud', 'feelslike_c', 'feelslike_f', 'uv',
        'gust_kph', 'gust_mph', 'condition_text', 'condition_code'
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()

    while current_date <= end_date:
        dt = current_date.strftime('%Y-%m-%d')
        end_dt = current_date + datetime.timedelta(days=6)
        if end_dt > end_date:
            end_dt = end_date
        end_dt_str = end_dt.strftime('%Y-%m-%d')

        params = {
            'key': WEATHER_API_KEY,
            'q': LOCATION,
            'dt': dt,
            'end_dt': end_dt_str
        }

        # try:
        response = requests.get(BASE_URL, params=params)
        print("Response status code:", response.status_code)
        print("Response content:")
        print(response.text)
        # Commenting out the json parsing for now
        data = response.json()

        # Check for errors
        if response.status_code != 200 or 'error' in data:
            print(f"Error fetching data for {dt} to {end_dt_str}: {data.get('error', {}).get('message', 'Unknown error')}")
            break

        # Process data
        for day in data['forecast']['forecastday']:
            for hour_data in day['hour']:
                # Extract data
                condition_text = hour_data['condition']['text']
                condition_code = hour_data['condition']['code']

                record = {
                    'date': day['date'],
                    'time': hour_data['time'],
                    'temp_c': hour_data['temp_c'],
                    'temp_f': hour_data['temp_f'],
                    'precip_mm': hour_data['precip_mm'],
                    'precip_in': hour_data['precip_in'],
                    'humidity': hour_data['humidity'],
                    'wind_kph': hour_data['wind_kph'],
                    'wind_mph': hour_data['wind_mph'],
                    'wind_dir': hour_data['wind_dir'],
                    'pressure_mb': hour_data['pressure_mb'],
                    'pressure_in': hour_data['pressure_in'],
                    'cloud': hour_data['cloud'],
                    'feelslike_c': hour_data['feelslike_c'],
                    'feelslike_f': hour_data['feelslike_f'],
                    'uv': hour_data['uv'],
                    'gust_kph': hour_data['gust_kph'],
                    'gust_mph': hour_data['gust_mph'],
                    'condition_text': condition_text,
                    'condition_code': condition_code
                }
                # Write to CSV
                writer.writerow(record)

        print(f"Data fetched for {dt} to {end_dt_str}")

        # except Exception as e:
        #     print(f"Exception occurred for {dt} to {end_dt_str}: {e}")
        #     break

        # Move to next 7-day period
        current_date += delta

        time.sleep(1)  


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv('../assets/nyc_hourly_weather_2019_1_1.csv')

# Convert 'time' column to datetime
df['time'] = pd.to_datetime(df['time'])

# Extract hour from the 'time' column
df['hour'] = df['time'].dt.hour

# Group by hour and calculate the average temperature
hourly_avg_temp = df.groupby('hour')['temp_f'].mean()

# Create a line plot
plt.figure(figsize=(12, 6))
plt.plot(hourly_avg_temp.index, hourly_avg_temp.values, marker='o')
plt.title('Average Temperature by Hour of Day (2019)')
plt.xlabel('Hour of Day')
plt.ylabel('Average Temperature (°F)')
plt.xticks(range(0, 24))
plt.grid(True, linestyle='--', alpha=0.7)

# Show the plot
plt.tight_layout()
plt.show()

# Print the hourly average temperatures
print(hourly_avg_temp)
