In [1]:
from enum import Enum
from tqdm import tqdm
import pandas as pd
import requests
import json


# An Enum to stay consistent.
class City(Enum):
    SYR = 0
    EWR = 1
    IAD = 2
    DEN = 3
    ORD = 4

In [2]:
# Build the coordinates for the respective cities.

coordinates = {
    City.SYR: {
        'latitude': 43.05,
        'longitude': -76.15
    },
    City.EWR: {
        'latitude': 40.70,
        'longitude': -74.20
    },
    City.IAD: {
        'latitude': 39.00,
        'longitude': -77.40
    },
    City.DEN: {
        'latitude': 39.70,
        'longitude': -105.00
    },
    City.ORD: {
        'latitude': 42.00,
        'longitude': -87.90
    }
}

In [3]:
# Build start and end date dictionary for the cities.

## All flight data was captured from 01/01/2015 to now.

dates = {
    City.DEN: {
        'start': pd.to_datetime("2022-02-11"),
        'end': pd.to_datetime("2023-01-31")
    },
    City.EWR: {
        'start': pd.to_datetime("2019-11-30"),
        'end': pd.to_datetime("2023-01-31")
    },
    City.IAD: {
        'start': pd.to_datetime("2018-07-02"),
        'end': pd.to_datetime("2023-01-02")
    },
    City.ORD: {
        'start': pd.to_datetime("2017-06-08"),
        'end': pd.to_datetime("2023-01-31")
    },
    City.SYR: {
        'start': pd.to_datetime('2017-06-08'),
        'end': pd.to_datetime('2023-01-31')
    }
}

In [4]:
str(dates[City.ORD]['start'].date())

'2017-06-08'

In [5]:
# Required weather parameters.

parameters = [
    'temperature_2m',
    'relativehumidity_2m',
    'dewpoint_2m',
    'apparent_temperature',
    'pressure_msl',
    'surface_pressure',
    'precipitation',
    'rain',
    'snowfall',
    'cloudcover',
    'cloudcover_low',
    'cloudcover_mid',
    'cloudcover_high',
    'windspeed_10m',
    'winddirection_10m',
    'windgusts_10m',
    'et0_fao_evapotranspiration',
    'vapor_pressure_deficit'
]

In [6]:
# A method that aggregates all the info above and generates the API URL.

def build_url(city: City):
    URL = "https://archive-api.open-meteo.com/v1/archive?latitude={latitude}&longitude={longitude}&start_date={start}&end_date={end}&hourly={params}&timezone=America%2FNew_York"
    return URL.format(
        latitude=coordinates[city]['latitude'],
        longitude=coordinates[city]['longitude'],
        start=str(dates[city]['start'].date()),
        end=str(dates[city]['end'].date()),
        params=",".join(parameters)
    )

In [6]:
# A method to parse the incoming API JSON response into a dataframe. 

def parse_response(response):

    if response.status_code != 200:
        raise Exception(f"Status code {response.status_code} received!")

    response_json = json.loads(response.content.decode('utf-8'))
    
    data = response_json['hourly']
    
    headers = list(data.keys())
    response_df = pd.DataFrame(columns=headers)
    
    for row_ix in tqdm(range(len(data['time']))):
        row_dict = {
            p: data[p][row_ix] for p in headers
        }
        
        response_df = response_df.append(
            row_dict, ignore_index=True
        )
    
    return response_df

In [7]:
# Loop over the required cities, create their respective DFs, and save them.

def save_historical_data():

    for city in [City.DEN, City.EWR, City.IAD, City.ORD, City.SYR]:

        print("Fetching weather details for ", city.name, "...")

        url = build_url(city)
        city_response = requests.get(url)

        city_df = parse_response(city_response)

        city_df.to_csv(f'../data/{city.name}_weather.csv', index=None)
        print(f"Weather info for {city.name} saved!")

In [8]:
# Uncomment this to parse the data and save the CSVs.

# save_historical_data()

 ## Predictive data API


In [9]:
def build_forecast_url(city: City, prediction_window_days=3):
    if prediction_window_days not in [1, 3, 7, 14, 16]:
        raise NotImplementedError(f"{prediction_window_days} days forecast is not supported by Open-Meteo.")
    
    url_template = "https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly={params}&forecast_days={window}"
    return url_template.format(
        lat=coordinates[city]['latitude'],
        lng=coordinates[city]['longitude'],
        window=prediction_window_days,
        params=",".join(parameters)
    )

In [15]:
def save_future_data():

    for city in [City.DEN, City.EWR, City.IAD, City.ORD, City.SYR]:

        print("Fetching weather details for ", city.name, "...")

        url = build_forecast_url(city, 7)
        city_response = requests.get(url)

        city_df = parse_response(city_response)
        city_df.to_csv(f"../data/{city.name}_weather_forecast.csv")
        
        print("Saved forecast for ", city.name, " to disk!")

In [16]:
save_future_data()

Fetching weather details for  DEN ...


100%|██████████| 168/168 [00:01<00:00, 127.23it/s]


Saved forecast for  DEN  to disk!
Fetching weather details for  EWR ...


100%|██████████| 168/168 [00:01<00:00, 131.11it/s]


Saved forecast for  EWR  to disk!
Fetching weather details for  IAD ...


100%|██████████| 168/168 [00:01<00:00, 128.88it/s]


Saved forecast for  IAD  to disk!
Fetching weather details for  ORD ...


100%|██████████| 168/168 [00:01<00:00, 133.58it/s]


Saved forecast for  ORD  to disk!
Fetching weather details for  SYR ...


100%|██████████| 168/168 [00:01<00:00, 123.59it/s]

Saved forecast for  SYR  to disk!



