In [8]:
import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry
import requests
import os
from datetime import datetime
from dotenv import load_dotenv

## Fetch Weather Data from Openmeteo

In [10]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 18.5196,
	"longitude": 73.8554,
	"start_date": "2015-01-11",
	"end_date": "2024-12-31",
	"daily": ["temperature_2m_max", "temperature_2m_min", "precipitation_sum", "weather_code", "sunshine_duration", "cloud_cover_mean", "wind_speed_10m_mean", "winddirection_10m_dominant"],
}
responses = openmeteo.weather_api(url, params=params)



In [11]:
response = responses[0]

print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

Coordinates: 18.523725509643555°N 73.86875915527344°E
Elevation: 555.0 m asl
Timezone difference to GMT+0: 0s


In [12]:
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()
daily_weather_code = daily.Variables(3).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(4).ValuesAsNumpy()
daily_cloud_cover_mean = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_mean = daily.Variables(6).ValuesAsNumpy()
daily_winddirection_10m_dominant = daily.Variables(7).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["weather_code"] = daily_weather_code
daily_data["sunshine_duration"] = daily_sunshine_duration
daily_data["cloud_cover_mean"] = daily_cloud_cover_mean
daily_data["wind_speed_10m_mean"] = daily_wind_speed_10m_mean
daily_data["winddirection_10m_dominant"] = daily_winddirection_10m_dominant

daily_dataframe = pd.DataFrame(data = daily_data)

In [13]:
print("\nDaily data\n", daily_dataframe)


Daily data
                           date  temperature_2m_max  temperature_2m_min  \
0    2015-01-11 00:00:00+00:00           28.209000           13.259000   
1    2015-01-12 00:00:00+00:00           27.709000           12.509000   
2    2015-01-13 00:00:00+00:00           28.159000           14.859000   
3    2015-01-14 00:00:00+00:00           28.959000           15.459000   
4    2015-01-15 00:00:00+00:00           28.209000           14.009000   
...                        ...                 ...                 ...   
3638 2024-12-27 00:00:00+00:00           30.253000           20.052999   
3639 2024-12-28 00:00:00+00:00           30.103001           19.503000   
3640 2024-12-29 00:00:00+00:00           31.003000           19.603001   
3641 2024-12-30 00:00:00+00:00           31.052999           19.802999   
3642 2024-12-31 00:00:00+00:00           30.653000           18.903000   

      precipitation_sum  weather_code  sunshine_duration  cloud_cover_mean  \
0                   

In [14]:
# Save the dataframe to csv

daily_dataframe.to_csv("../data/raw/weather-data.csv")

## Merging All Yearly AQI Data
Since the data is in .xlsx format we must first format it, we're looking to have 2 columns only [date, AQI]

In [24]:
df = pd.read_excel(r"../data/raw/AQI-2020.xlsx", index_col=0)

In [25]:
df

Unnamed: 0_level_0,January,February,March,April,May,June,July,August,September,October,November,December
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,130,153.0,86,49.0,52,61.0,78,51.0,62.0,,82.0,121.0
2,116,165.0,100,49.0,50,62.0,69,48.0,63.0,,119.0,136.0
3,108,104.0,133,53.0,50,59.0,66,46.0,60.0,,98.0,136.0
4,149,95.0,91,50.0,48,61.0,63,,59.0,,79.0,148.0
5,198,128.0,103,48.0,49,59.0,61,46.0,56.0,,60.0,137.0
6,170,109.0,93,50.0,49,58.0,59,46.0,57.0,,65.0,149.0
7,230,88.0,91,52.0,101,58.0,60,47.0,57.0,,75.0,145.0
8,105,100.0,93,71.0,385,61.0,63,47.0,52.0,,77.0,112.0
9,115,91.0,82,61.0,49,49.0,67,44.0,52.0,,62.0,121.0
10,172,76.0,82,50.0,51,46.0,67,43.0,53.0,,68.0,92.0


In [55]:
# Initialise a list to store AQI in [date, AQI] format
all_aqi = []
months = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December"
]

In [62]:
for date, month_aqi in df.iterrows():
    print(date)
    for month in months:
        print(month, month_aqi[month])
        aqi = month_aqi[month]
        x = datetime.datetime(2020,month,date)
        print(aqi, x)


1
January 130.0


AttributeError: type object 'datetime.datetime' has no attribute 'datetime'