In [None]:
# Import libraries
import pandas as pd
import os
import requests
import time
from datetime import datetime
from datetime import timezone
from meteostat import Hourly

# To be able to pull the API key from the .env
from dotenv import load_dotenv
load_dotenv()

# Folders for loading and saving data
load_path = "divvy/"
save_path = "data/"
if os.path.isdir(load_path) == False:
    os.mkdir(load_path)
if os.path.isdir(save_path) == False:
    os.mkdir(save_path)

### 1. Weather dataset from Meteostat 'Chicago Midway airport', ID 72534 (only proof of concept) ###

Data structure:
https://dev.meteostat.net/python/hourly.html#example

|Column|Description|Type|
|---|---|---|
|time|The datetime of the observation|Datetime64|
|temp|The air temperature in °C|Float64|
|dwpt|The dew point in °C|Float64|
|rhum|The relative humidity in percent (%)|Float64|
|prcp|The one hour precipitation total in mm|Float64|
|snow|The snow depth in mm|Float64|
|wdir|The average wind direction in degrees (°)|Float64|
|wspd|The average wind speed in km/h|Float64|
|wpgt|The peak wind gust in km/h|Float64|
|pres|The average sea-level air pressure in hPa|Float64|
|tsun|The one hour sunshine total in minutes (m)|Float64|
|coco|The weather condition code|Float64|

In [None]:
# Get hourly weather data from Chicago Midway airport (ID 72534) from Meteostat-API
df_meteostat = pd.DataFrame

# Set time period
start = datetime(2023, 9, 1, 0, 0)
end = datetime(2023, 9, 30, 23, 59)

# Get hourly data for weather station 'Chicago Midway airport' (ID 72534)
data = Hourly('72534', start, end)
df_meteostat = data.fetch()

# Drop unused columns
df_meteostat.drop(columns=['dwpt', 'rhum', 'wpgt', 'pres', 'tsun', 'coco'], inplace=True)
df_meteostat.reset_index(drop=False, inplace=True)

# Cleaning
df_meteostat = df_meteostat.astype({'wdir': int})
df_meteostat['prcp'].fillna(0, inplace=True)
df_meteostat['snow'].fillna(0, inplace=True)

# Renaming
df_meteostat.rename(columns = {'time':'datetime', 'prcp':'rain_1h', 'snow':'snow_1h', 'wdir':'wind_deg', 'wspd':'wind_speed_km_h'}, inplace = True)

# Sorting
df_meteostat.drop_duplicates(subset=['datetime'], inplace=True)

# Check if received Datapoints match expected number of Datapoints (days * 24)
if (((end - start).days + 1) * 24) == len(df_meteostat):
    print(f'Received Datapoints from Meteostat (' + str(len(df_meteostat)) + ') match expected Datapoints (' + str(((end - start).days + 1) * 24) + ').')
else:
    print(f'Received Datapoints from Meteostat (' + str(len(df_meteostat)) + ') DO NOT match expected Datapoints (' + str(((end - start).days + 1) * 24) + ')!')

# Raw estimation: when in DataFrame precipitation and temperatures < 0.0 °C -> snow instead of rain
for i, row in df_meteostat.iterrows():
    if (row['temp'] < 0.0) and (row['rain_1h'] > 0.0):
        df_meteostat.at[i,'snow_1h'] = df_meteostat.at[i,'rain_1h']
        df_meteostat.at[i,'rain_1h'] = 0

# Print DataFrame
df_meteostat.info(show_counts=True)

### 2. Get weather data for a specific month with Openweather API 3.0 call ###

In [None]:
# Get longitude and latitude from city name (optional)
# http://api.openweathermap.org/geo/1.0/direct?q={city_name},{state_code},{country_code}&limit={limit}&appid={APIkey}
url = 'http://api.openweathermap.org/geo/1.0/direct?'
parameters = {
    'q': 'Chicago, US',
    'limit': '1',
    'appid': os.getenv('openweather_api_key'), # extract value for openweather api key from private .env file, 
    }

# Send API request
r = requests.get(url, parameters)
city_location = r.json()
df_city_location = pd.json_normalize(city_location)

# Output location of city
print(f"The city of {df_city_location.loc[0, 'name']} has a latitude of {df_city_location.loc[0, 'lat']} and a longitude of {df_city_location.loc[0, 'lon']}.")

In [None]:
# With Openweather API 3.0 you have 1000 calls per day for free. If your API call exceeds 1000 calls the code won't be executed.
# Chicago: Lat: 41.878114, Lon: -87.629798 (taken from bought Openweather csv-file)
# General API 3.0 call: https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&units=metric&appid={APIkey}

# Set time period (one month)
start = datetime(2023, 11, 2, 0, 0)
start_unix = time.mktime(start.utctimetuple()) + 3600 # added correction of one hour
end = datetime(2023, 12, 1, 23, 0)
end_unix = time.mktime(end.utctimetuple()) + 3600 # added correction of one hour

# Check amount of API calls and proceed if less than 999
if ((end_unix - start_unix) / 3600) <= 999:

    # Prepare API-call with parameters
    url = 'https://api.openweathermap.org/data/3.0/onecall/timemachine?'
    lat = 'lat=41.878114'
    lon = '&lon=-87.629798'
    unit = '&units=metric'
    api_key = '&appid=' + os.getenv('openweather_api_key')

    # Initialise DataFrame
    df_weather_monthly = pd.DataFrame([])

    # Start while loop for receiving weather data
    i = int(start_unix)
    while i <= int(end_unix):
    
        # set timestamp
        timestamp = '&dt=' + str(i)
        # set API call
        url_f = url + lat + lon + timestamp + unit + api_key
        # send API request
        r = requests.get(url_f)
        weather_temp = r.json()
    
        # Extract data from API request and normalize data
        df_data_temp = pd.json_normalize(weather_temp,
                                        record_path="data",
                                        meta=["lat", "lon", "timezone", "timezone_offset"],
                                        errors='ignore')
        df_data_temp.rename(columns = {'rain.1h':'rain_1h', 'snow.1h':'snow_1h'}, inplace = True)
        # Extract data from API request and normalize weather information
        df_weather_temp = pd.json_normalize(weather_temp,
                                        record_prefix="weather_",
                                        record_path=["data", "weather"],
                                        errors='ignore')
        # Concat separate DataFrames into one (sometimes there is more than one weather information -> ffill())
        df_temp = pd.concat([df_data_temp, df_weather_temp], axis=1).ffill().drop(columns=['weather'])
        # Add columns with ISO time and city name
        df_temp['dt_iso'] = datetime.fromtimestamp(df_temp['dt'].iloc[0], tz=timezone.utc)
        df_temp['city_name'] = 'Chicago'

        # Copy single DataFrame into monthly DataFrame
        df_weather_monthly = pd.concat([df_weather_monthly, df_temp], ignore_index=True)

        # Delete temporary DataFames
        del df_data_temp
        del df_weather_temp
        del df_temp

        # Add one hour (in seconds) to the counter variable
        i += 3600

else:
    print('No API calls executed, because it would exceed the free limit of 1000 calls per day')

In [None]:
# Sort, clean and save DataFrame as csv-file
save_name = 'Chicago_weather_2023_11.csv'
# Add rain_1h, snow_1h and wind_gust columns if they don't exist
if not 'rain_1h' in df_weather_monthly.columns:
    df_weather_monthly['rain_1h'] = 0.0
if not 'snow_1h' in df_weather_monthly.columns:
    df_weather_monthly['snow_1h'] = 0.0
if not 'wind_gust' in df_weather_monthly.columns:
    df_weather_monthly['wind_gust'] = 0.0
# Copy necessary columns to new DataFrame for saving later
df_save_weather = df_weather_monthly[['dt', 'dt_iso', 'timezone_offset', 'city_name', 'lat', 'lon', 'temp', 'visibility', 'dew_point', 'feels_like', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'wind_gust', 'rain_1h', 'snow_1h', 'clouds', 'weather_id', 'weather_main', 'weather_description', 'weather_icon']].copy()
# Rename columns
df_save_weather.rename(columns = {'timezone_offset':'timezone', 'clouds':'clouds_all'}, inplace = True)
# Set new datatype to columns
df_save_weather.loc[:,'dt'] = df_save_weather['dt'].astype('Int64')
df_save_weather.loc[:,'timezone'] = df_save_weather['timezone'].astype('Int64')
df_save_weather.loc[:,'visibility'] = df_save_weather['visibility'].astype('Int64')
df_save_weather.loc[:,'pressure'] = df_save_weather['pressure'].astype('Int64')
df_save_weather.loc[:,'wind_deg'] = df_save_weather['wind_deg'].astype('Int64')
df_save_weather.loc[:,'clouds_all'] = df_save_weather['clouds_all'].astype('Int64')
# Save DataFrame to csv-file
df_save_weather.to_csv(load_path + save_name, index=False)
# Show DataFrame
df_save_weather.info()
df_save_weather

### 3. Create combined weather dataset from bought Openweather csv-file 'History Bulk' (including August 2023) and API 3.0 calls (from September 2023) ###

In [None]:
# Create list csv-files with weather data
weather_data_list = []
for filename in os.listdir(load_path):
    if ("Chicago_weather" in filename) and filename.endswith('csv'):
        weather_data_list.append(filename)

df_openweather = pd.DataFrame

for file in weather_data_list:
    # Open the csv-file(s) in a DataFrame
    df_weather_temp = pd.read_csv(load_path + file, low_memory=False)

    # Calculate local time
    df_weather_temp['datetime'] = pd.to_datetime(df_weather_temp['dt'] + df_weather_temp['timezone'], unit='s')

    # Drop duplicates
    df_weather_temp.drop_duplicates(subset=['datetime'], inplace=True)

    # Cleaning
    df_weather_temp['rain_1h'].fillna(0, inplace=True)
    df_weather_temp['snow_1h'].fillna(0, inplace=True)

    # Change wind_speed to unit km/h and round value to 1 decimal
    df_weather_temp['wind_speed'] = round(df_weather_temp['wind_speed'] * 3.6, 1)
    df_weather_temp.rename(columns = {'wind_speed':'wind_speed_km_h'}, inplace = True)

    # Using weather data from '2013-06-27 00:00'
    df_weather_temp = df_weather_temp[(df_weather_temp['datetime'] >= '2013-06-27 00:00')]

    # Copy only relevant columns to final DataFrame
    if file == "Chicago_weather.csv":
        df_openweather = df_weather_temp[['datetime', 'temp', 'rain_1h', 'snow_1h', 'wind_speed_km_h', 'wind_deg']]
    else:
        df_openweather = pd.concat([df_openweather, df_weather_temp[['datetime', 'temp', 'rain_1h', 'snow_1h', 'wind_speed_km_h', 'wind_deg']]])

df_openweather.sort_values('datetime', inplace=True)
df_openweather['wind_deg'] = df_openweather['wind_deg'].astype(int)
df_openweather.reset_index(drop=True, inplace=True)
df_openweather.info(show_counts=True)

# Save DataFrame as csv-file
df_openweather.to_csv(save_path + 'chicago_weather.csv', index=False)