# OpenMeteo Weather Scraper - Jan to June 2015

In [5]:
import requests
import pandas as pd
from datetime import datetime, timedelta

In [None]:
latitude = 40.7128  
longitude = -74.0060

start_date = datetime(2015, 1, 1)
end_date = datetime(2015, 6, 30)

url_template = (
    "https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}"
    "&start_date={start}&end_date={end}"
    "&hourly=temperature_2m,relative_humidity_2m,windspeed_10m,precipitation,surface_pressure,weathercode"
    "&timezone=America%2FNew_York"
)

In [None]:
current_start = start_date
data_frames = []

while current_start <= end_date:
    current_end = (current_start.replace(day=28) + timedelta(days=4)).replace(day=1) - timedelta(days=1)
    if current_end > end_date:
        current_end = end_date
    
    url = url_template.format(
        lat=latitude,
        lon=longitude,
        start=current_start.strftime('%Y-%m-%d'),
        end=current_end.strftime('%Y-%m-%d')
    )
    
    response = requests.get(url)
    if response.status_code == 200:
        json_data = response.json()
        df = pd.DataFrame(json_data['hourly'])
        data_frames.append(df)
    else:
        print(f"Failed to fetch data for {current_start.strftime('%Y-%m')}")
    
    current_start = current_end + timedelta(days=1)

In [None]:
import os
import matplotlib.pyplot as plt

# Create necessary directories
os.makedirs('../database', exist_ok=True)
os.makedirs('../visualizations', exist_ok=True)

# Combine all data
final_df = pd.concat(data_frames, ignore_index=True)

# Save to database folder
final_df.to_csv("../database/weather_data_jan_june_2015_complete.csv", index=False)
print(f"✅ Weather data saved to database/ ({len(final_df)} rows)")

# Create visualization
plt.figure(figsize=(15, 8))

plt.subplot(2, 2, 1)
plt.plot(final_df['time'], final_df['temperature_2m'], linewidth=0.5)
plt.title('Temperature Over Time (Jan-June 2015)')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.xticks(rotation=45)

plt.subplot(2, 2, 2)
plt.plot(final_df['time'], final_df['relative_humidity_2m'], linewidth=0.5, color='orange')
plt.title('Humidity Over Time')
plt.xlabel('Date')
plt.ylabel('Relative Humidity (%)')
plt.xticks(rotation=45)

plt.subplot(2, 2, 3)
plt.plot(final_df['time'], final_df['windspeed_10m'], linewidth=0.5, color='green')
plt.title('Wind Speed Over Time')
plt.xlabel('Date')
plt.ylabel('Wind Speed (km/h)')
plt.xticks(rotation=45)

plt.subplot(2, 2, 4)
plt.plot(final_df['time'], final_df['precipitation'], linewidth=0.5, color='blue')
plt.title('Precipitation Over Time')
plt.xlabel('Date')
plt.ylabel('Precipitation (mm)')
plt.xticks(rotation=45)

plt.tight_layout()
plt.savefig('../visualizations/weather_data_overview.png', dpi=300, bbox_inches='tight')
plt.close()

print("✅ Weather visualization saved to visualizations/")