In [1]:
import requests
import json
import pandas as pd
from dotenv import load_dotenv
import os
from datetime import datetime

In [None]:
# Load and initialise WWO API_KEY from .env file
load_dotenv(".env.weather")
API_KEY = os.getenv("WWO_API_KEY")
# print(API_KEY)

In [None]:
def fetch_weather_data(
    start_date, end_date, location="Adelaide,Australia", api_key=None, tp=3
):
    base_url = "https://api.worldweatheronline.com/premium/v1/past-weather.ashx"
    date_ranges = pd.date_range(start=start_date, end=end_date, freq="MS")
    all_data = []

    for start in date_ranges:
        print(start)
        end = (start + pd.offsets.MonthEnd(0)).strftime("%Y-%m-%d")
        start_str = start.strftime("%Y-%m-%d")
        params = {
            "key": api_key,
            "q": location,
            "format": "json",
            "date": start_str,
            "enddate": end,
            "tp": tp,
        }

        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            json_data = response.json()
            weather_data = json_data.get("data", {}).get("weather", [])

            for day in weather_data:
                date = day["date"]
                for entry in day.get("hourly", []):
                    all_data.append(
                        {
                            "date": date,
                            "time": entry.get("time"),
                            "tempC": entry.get("tempC"),
                            "FeelsLikeC": entry.get("FeelsLikeC"),
                            "humidity": entry.get("humidity"),
                            "windspeedKmph": entry.get("windspeedKmph"),
                            "WindGustKmph": entry.get("WindGustKmph"),
                            "cloudcover": entry.get("cloudcover"),
                        }
                    )
        else:
            print(
                f"Error fetching data for {start_str} to {end}: {response.status_code}"
            )

    return pd.DataFrame(all_data)

In [17]:
df_weather = fetch_weather_data(
    start_date="2021-10-01",
    end_date="2025-04-23",
    location="Melbourne,Australia",
    api_key=API_KEY,
    tp=1,  # hourly resolution
)

2021-10-01 00:00:00
2021-11-01 00:00:00
2021-12-01 00:00:00
2022-01-01 00:00:00
2022-02-01 00:00:00
2022-03-01 00:00:00
2022-04-01 00:00:00
2022-05-01 00:00:00
2022-06-01 00:00:00
2022-07-01 00:00:00
2022-08-01 00:00:00
2022-09-01 00:00:00
2022-10-01 00:00:00
2022-11-01 00:00:00
2022-12-01 00:00:00
2023-01-01 00:00:00
2023-02-01 00:00:00
2023-03-01 00:00:00
2023-04-01 00:00:00
2023-05-01 00:00:00
2023-06-01 00:00:00
2023-07-01 00:00:00
2023-08-01 00:00:00
2023-09-01 00:00:00
2023-10-01 00:00:00
2023-11-01 00:00:00
2023-12-01 00:00:00
2024-01-01 00:00:00
2024-02-01 00:00:00
2024-03-01 00:00:00
2024-04-01 00:00:00
2024-05-01 00:00:00
2024-06-01 00:00:00
2024-07-01 00:00:00
2024-08-01 00:00:00
2024-09-01 00:00:00
2024-10-01 00:00:00
2024-11-01 00:00:00
2024-12-01 00:00:00
2025-01-01 00:00:00
2025-02-01 00:00:00
2025-03-01 00:00:00
2025-04-01 00:00:00


In [18]:
df_weather.head()

Unnamed: 0,date,time,tempC,FeelsLikeC,humidity,windspeedKmph,WindGustKmph,cloudcover
0,2021-10-01,0,12,11,97,9,18,86
1,2021-10-01,100,12,11,97,10,19,82
2,2021-10-01,200,12,11,97,11,25,77
3,2021-10-01,300,12,10,97,11,26,75
4,2021-10-01,400,12,11,97,11,26,76


In [19]:
df_weather["time"] = df_weather["time"].astype(str).str.zfill(4)
df_weather["datetime"] = pd.to_datetime(
    df_weather["date"] + df_weather["time"], format="%Y-%m-%d%H%M"
)
df_weather = df_weather.drop(columns=["date", "time"])
df_weather.head()

Unnamed: 0,tempC,FeelsLikeC,humidity,windspeedKmph,WindGustKmph,cloudcover,datetime
0,12,11,97,9,18,86,2021-10-01 00:00:00
1,12,11,97,10,19,82,2021-10-01 01:00:00
2,12,11,97,11,25,77,2021-10-01 02:00:00
3,12,10,97,11,26,75,2021-10-01 03:00:00
4,12,11,97,11,26,76,2021-10-01 04:00:00


In [20]:
df_weather.to_csv("data/mel_weather_data.csv", index=False)