In [1]:
from enum import Enum
from tqdm import tqdm
import pandas as pd
import requests
import json


# An Enum to stay consistent.
class City(Enum):
    SYR = 0
    EWR = 1
    IAD = 2
    DEN = 3
    ORD = 4

In [2]:
# Build the coordinates for the respective cities.

coordinates = {
    City.SYR: {
        'latitude': 43.05,
        'longitude': -76.15
    },
    City.EWR: {
        'latitude': 40.70,
        'longitude': -74.20
    },
    City.IAD: {
        'latitude': 39.00,
        'longitude': -77.40
    },
    City.DEN: {
        'latitude': 39.70,
        'longitude': -105.00
    },
    City.ORD: {
        'latitude': 42.00,
        'longitude': -87.90
    }
}

In [3]:
# Build start and end date dictionary for the cities.

## All flight data was captured from 01/01/2015 to now.

dates = {
    City.DEN: {
        'start': pd.to_datetime("2022-02-11"),
        'end': pd.to_datetime("2023-01-31")
    },
    City.EWR: {
        'start': pd.to_datetime("2019-11-30"),
        'end': pd.to_datetime("2023-01-31")
    },
    City.IAD: {
        'start': pd.to_datetime("2018-07-02"),
        'end': pd.to_datetime("2023-01-02")
    },
    City.ORD: {
        'start': pd.to_datetime("2017-06-08"),
        'end': pd.to_datetime("2023-01-31")
    },
    City.SYR: {
        'start': pd.to_datetime('2017-06-08'),
        'end': pd.to_datetime('2023-01-31')
    }
}

In [4]:
str(dates[City.ORD]['start'].date())

'2017-06-08'

In [5]:
# Required weather parameters.

parameters = [
    "temperature_2m",
    "relativehumidity_2m",
    "dewpoint_2m",
    "apparent_temperature",
    "pressure_msl",
    "surface_pressure",
    "precipitation",
    "rain",
    "snowfall",
    "cloudcover",
    "cloudcover_low",
    "cloudcover_mid",
    "cloudcover_high",
    "shortwave_radiation",
    "direct_radiation",
    "diffuse_radiation",
    "direct_normal_irradiance",
    "windspeed_10m",
    "windspeed_100m",
    "winddirection_10m",
    "winddirection_100m",
    "windgusts_10m",
    "et0_fao_evapotranspiration",
    "vapor_pressure_deficit"
]

In [6]:
# A method that aggregates all the info above and generates the API URL.

def build_url(city: City):
    URL = "https://archive-api.open-meteo.com/v1/archive?latitude={latitude}&longitude={longitude}&start_date={start}&end_date={end}&hourly={params}&timezone=America%2FNew_York"
    return URL.format(
        latitude=coordinates[city]['latitude'],
        longitude=coordinates[city]['longitude'],
        start=str(dates[city]['start'].date()),
        end=str(dates[city]['end'].date()),
        params=",".join(parameters)
    )

In [7]:
# A method to parse the incoming API JSON response into a dataframe. 

def parse_response(response):

    if response.status_code != 200:
        raise Exception(f"Status code {response.status_code} received!")

    response_json = json.loads(response.content.decode('utf-8'))
    
    data = response_json['hourly']
    
    headers = list(data.keys())
    response_df = pd.DataFrame(columns=headers)
    
    for row_ix in tqdm(range(len(data['time']))):
        row_dict = {
            p: data[p][row_ix] for p in headers
        }
        
        response_df = response_df.append(
            row_dict, ignore_index=True
        )
    
    return response_df

In [8]:
# Loop over the required cities, create their respective DFs, and save them.

def save_historical_data():

    for city in [City.DEN, City.EWR, City.IAD, City.ORD, City.SYR]:

        print("Fetching weather details for ", city.name, "...")

        url = build_url(city)
        city_response = requests.get(url)

        city_df = parse_response(city_response)

        city_df.to_csv(f'../data/{city.name}_weather.csv', index=None)
        print(f"Weather info for {city.name} saved!")

Fetching weather details for  DEN ...


100%|██████████| 8520/8520 [01:50<00:00, 77.18it/s] 


Weather info for DEN saved!
Fetching weather details for  EWR ...


100%|██████████| 27816/27816 [08:33<00:00, 54.17it/s]


Weather info for EWR saved!
Fetching weather details for  IAD ...


100%|██████████| 39504/39504 [13:42<00:00, 48.05it/s]


Weather info for IAD saved!
Fetching weather details for  ORD ...


100%|██████████| 49536/49536 [15:01:59<00:00,  1.09s/it]       


Weather info for ORD saved!
Fetching weather details for  SYR ...


100%|██████████| 49536/49536 [20:26<00:00, 40.38it/s]  


Weather info for SYR saved!


In [None]:
# Uncomment this to parse the data and save the CSVs.

# save_historical_data()