In [6]:
import requests
import pandas as pd
import seaborn as sns
from datetime import datetime
from io import StringIO
import ast

In [7]:
file_path = 'https://raw.githubusercontent.com/selvinfurtado01/schulich_data_science/main/postal_code_15.csv' 
postal_df = pd.read_csv(file_path)

In [8]:
postal_df

Unnamed: 0,province,region,zipcode,latitude,longitude
0,Ontario,Ottawa,K2G2G3,45.355427,-75.738247
1,British Columbia,Capital,V8Y1A2,48.499499,-123.381893
2,Quebec,Capitale-Nationale,G1C4P7,46.857543,-71.221873
3,Ontario,Toronto,M9B1J5,43.634651,-79.55558
4,British Columbia,Central Kootenay,V1L1C3,49.484687,-117.296608
5,Quebec,Bas-Saint-Laurent,G5H2G8,48.593961,-68.194106
6,Quebec,Montréal,H8Z3K6,45.4998,-73.847716
7,Ontario,Toronto,M9P1H7,43.684491,-79.534482
8,Quebec,Capitale-Nationale,G1K4L3,46.81207,-71.242008
9,Manitoba,Winnipeg,R3T3K8,49.786447,-97.152627


In [9]:
# API URL
api_url = 'https://archive-api.open-meteo.com/v1/era5'

In [11]:
# Creating a DataFrame to store Historical Data
historical_data = []

# Iterating through a for loop to generate parameters and get data from the API
for index, row in postal_df.iterrows():
    query_parameters = {
        'latitude': row['latitude'],
        'longitude': row['longitude'],
        'start_date': '2021-01-01',
        'end_date': '2021-01-10',
        'hourly': 'temperature_2m,relative_humidity_2m,wind_speed_10m'
    }

    response = requests.get(url=api_url, params=query_parameters)
    if response.status_code == 200:
        data_hist = response.json()
        historical_info = {
            'latitude': row['latitude'],
            'longitude': row['longitude'],
            'province': row['province'],
            'zipcode': row['zipcode'],
            'timezone': data_hist['timezone'],
            'elevation': data_hist['elevation'],
            'time': data_hist['hourly']['time'],
            'temperature_2m': data_hist['hourly']['temperature_2m'],
            'relative_humidity_2m': data_hist['hourly']['relative_humidity_2m'],
            'wind_speed_10m': data_hist['hourly']['wind_speed_10m'] 
        }
        historical_data.append(historical_info)

# Create DataFrame from historical_data
historical_df = pd.DataFrame(historical_data)

In [12]:
# Initialize an empty list to hold the additional list of values
updated_rows = []

# Concatenate the common and additional list values into a Dataframe
for i, row in historical_df.iterrows():
    common_values = {
        'latitude': row['latitude'],
        'longitude': row['longitude'],
        'province': row['province'],
        'zipcode': row['zipcode'],
        'timezone': row['timezone'],
        'elevation': row['elevation']
    }
    
    for j in range(len(row['time'])):
        new_row = {
            **common_values,
            'time': row['time'][j],
            'temperature_2m': row['temperature_2m'][j],
            'relative_humidity_2m': row['relative_humidity_2m'][j],
            'wind_speed_10m': row['wind_speed_10m'][j]
        }
        updated_rows.append(new_row)

# Create new DataFrame for Histroical Weather Data
weather_hist_df = pd.DataFrame(updated_rows)
weather_hist_df

Unnamed: 0,latitude,longitude,province,zipcode,timezone,elevation,time,temperature_2m,relative_humidity_2m,wind_speed_10m
0,45.355427,-75.738247,Ontario,K2G2G3,GMT,98.0,2021-01-01T00:00,-2.6,72,9.9
1,45.355427,-75.738247,Ontario,K2G2G3,GMT,98.0,2021-01-01T01:00,-3.0,71,10.0
2,45.355427,-75.738247,Ontario,K2G2G3,GMT,98.0,2021-01-01T02:00,-3.8,75,8.2
3,45.355427,-75.738247,Ontario,K2G2G3,GMT,98.0,2021-01-01T03:00,-3.8,78,7.2
4,45.355427,-75.738247,Ontario,K2G2G3,GMT,98.0,2021-01-01T04:00,-3.8,78,5.4
...,...,...,...,...,...,...,...,...,...,...
3595,49.245355,-124.803392,British Columbia,V9Y4M3,GMT,24.0,2021-01-10T19:00,6.7,94,1.5
3596,49.245355,-124.803392,British Columbia,V9Y4M3,GMT,24.0,2021-01-10T20:00,7.8,93,1.5
3597,49.245355,-124.803392,British Columbia,V9Y4M3,GMT,24.0,2021-01-10T21:00,7.7,100,4.8
3598,49.245355,-124.803392,British Columbia,V9Y4M3,GMT,24.0,2021-01-10T22:00,7.9,99,6.9


In [13]:
weather_hist_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3600 entries, 0 to 3599
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   latitude              3600 non-null   float64
 1   longitude             3600 non-null   float64
 2   province              3600 non-null   object 
 3   zipcode               3600 non-null   object 
 4   timezone              3600 non-null   object 
 5   elevation             3600 non-null   float64
 6   time                  3600 non-null   object 
 7   temperature_2m        3600 non-null   float64
 8   relative_humidity_2m  3600 non-null   int64  
 9   wind_speed_10m        3600 non-null   float64
dtypes: float64(5), int64(1), object(4)
memory usage: 281.4+ KB


In [14]:
weather_hist_df.describe()

Unnamed: 0,latitude,longitude,elevation,temperature_2m,relative_humidity_2m,wind_speed_10m
count,3600.0,3600.0,3600.0,3600.0,3600.0,3600.0
mean,46.830629,-88.760812,126.266667,-1.934444,82.668889,10.451333
std,2.144802,21.179028,148.669438,5.192898,11.447566,5.84115
min,43.634651,-124.803392,16.0,-17.1,33.0,0.0
25%,45.355427,-117.296608,31.0,-5.7,76.0,6.0
50%,46.81207,-79.534482,66.0,-2.7,85.0,10.0
75%,49.245355,-72.730673,144.0,1.8,91.0,13.8
max,49.786447,-68.194106,624.0,9.5,100.0,41.8


In [15]:
# Converting datatype of time column to datetime 
weather_hist_df['time'] = pd.to_datetime(weather_hist_df['time'])

In [16]:
# Extract new features from time column

weather_hist_df['hour'] = weather_hist_df['time'].dt.hour
weather_hist_df['day'] = weather_hist_df['time'].dt.day
weather_hist_df['month'] = weather_hist_df['time'].dt.month
weather_hist_df['year'] = weather_hist_df['time'].dt.year

# Displaying new features in dataframe
weather_hist_df[['time', 'hour', 'day', 'month', 'year']]

Unnamed: 0,time,hour,day,month,year
0,2021-01-01 00:00:00,0,1,1,2021
1,2021-01-01 01:00:00,1,1,1,2021
2,2021-01-01 02:00:00,2,1,1,2021
3,2021-01-01 03:00:00,3,1,1,2021
4,2021-01-01 04:00:00,4,1,1,2021
...,...,...,...,...,...
3595,2021-01-10 19:00:00,19,10,1,2021
3596,2021-01-10 20:00:00,20,10,1,2021
3597,2021-01-10 21:00:00,21,10,1,2021
3598,2021-01-10 22:00:00,22,10,1,2021


In [17]:
# Set bins based on the provided statistical values
# Considering different bin values to categorize the data
temperature_bins = [-20.0, -5.0, 0, 5, 30.0]
temperature_labels = ['Very Cold', 'Cold', 'Mild', 'Warm']
weather_hist_df['temperature_category'] = pd.cut(weather_hist_df['temperature_2m'], bins=temperature_bins, labels=temperature_labels)

humidity_bins = [30, 65, 80, 90, 110]
humidity_labels = ['Very Low', 'Low', 'Moderate', 'High']
weather_hist_df['humidity_category'] = pd.cut(weather_hist_df['relative_humidity_2m'], bins=humidity_bins, labels=humidity_labels)

wind_speed_bins = [-5, 5, 10, 15, 50]
wind_speed_labels = ['Calm', 'Breezy', 'Windy', 'Very Windy']
weather_hist_df['wind_speed_category'] = pd.cut(weather_hist_df['wind_speed_10m'], bins=wind_speed_bins, labels=wind_speed_labels)

# Displaying new features extracted from above columns
weather_hist_df[['temperature_category', 'humidity_category', 'wind_speed_category']]

Unnamed: 0,temperature_category,humidity_category,wind_speed_category
0,Cold,Low,Breezy
1,Cold,Low,Breezy
2,Cold,Low,Breezy
3,Cold,Low,Breezy
4,Cold,Low,Breezy
...,...,...,...
3595,Warm,High,Calm
3596,Warm,High,Calm
3597,Warm,High,Calm
3598,Warm,High,Breezy
