## The purpose of this notebook is to combine the daily weather reports from (http://www.climate.weather.gc.ca) for west, east, north, south and central toronto weather stations.

These weather stations will be averaged in a later notebook to obtain true daily weather and climate for toronto which is representative of the whole region.

## TOR_BUTTON_COMBINED - North East near Markham

This dataset was segmented into 3 different weather stations

Range:  01-01-2019 - 06-30-2019

"Station Name","TORONTO BUTTONVILLE A"
"Province","ONTARIO"
"Latitude","43.86"
"Longitude","-79.37"
"Elevation","198.10"
"Climate Identifier","6158409"
"WMO Identifier",""
"TC Identifier","YKZ"

Range:  05-21-2015 - 12-31-2018

"Station Name","TORONTO BUTTONVILLE A"
"Province","ONTARIO"
"Latitude","43.86"
"Longitude","-79.37"
"Elevation","198.10"
"Climate Identifier","6158410"
"WMO Identifier","71639"
"TC Identifier","YKZ"

Range:  01-01-2014 - 05-20-2015

"Station Name","TORONTO BUTTONVILLE A"
"Province","ONTARIO"
"Latitude","43.86"
"Longitude","-79.37"
"Elevation","198.10"
"Climate Identifier","615HMAK"
"WMO Identifier","71639"
"TC Identifier","YKZ"

## TORONTO_CITY - Center near St. George Campus

"Station Name","TORONTO CITY"
"Province","ONTARIO"
"Latitude","43.67"
"Longitude","-79.40"
"Elevation","112.50"
"Climate Identifier","6158355"
"WMO Identifier","71508"
"TC Identifier","XTO"

## TORONTO_CITY_CENTRE - South near Toronto Islands

"Station Name","TORONTO CITY CENTRE"
"Province","ONTARIO"
"Latitude","43.63"
"Longitude","-79.40"
"Elevation","76.80"
"Climate Identifier","6158359"
"WMO Identifier","71265"
"TC Identifier","YTZ"

## TORONTO_INTL_A - West near Pearson Airport

"Station Name","TORONTO INTL A"
"Province","ONTARIO"
"Latitude","43.68"
"Longitude","-79.63"
"Elevation","173.40"
"Climate Identifier","6158731"
"WMO Identifier","71624"
"TC Identifier","YYZ"

## TORONTO_NORTH_YORK - North near North York

"Station Name","TORONTO NORTH YORK"
"Province","ONTARIO"
"Latitude","43.78"
"Longitude","-79.47"
"Elevation","187.00"
"Climate Identifier","615S001"
"WMO Identifier",""
"TC Identifier",""

In [1]:
# import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import os

# folder structure
folders = ["TOR_BUTTON_COMBINED", "TORONTO_CITY", "TORONTO_CITY_CENTRE", 
           "TORONTO_INTL_A", "TORONTO_NORTH_YORK"]

# daily weather station readouts from Jan 1 till Dec 31 from 2014 - 2015
daily_csv_list = ["eng-daily-01012014-12312014.csv", 
                  "eng-daily-01012015-12312015.csv", 
                  "eng-daily-01012016-12312016.csv", 
                  "eng-daily-01012017-12312017.csv", 
                  "eng-daily-01012018-12312018.csv", 
                  "eng-daily-01012019-12312019.csv"
                 ]
# output the cleaned daily weather station readouts
year_list = ["daily_2014.csv", "daily_2015.csv", "daily_2016.csv", 
            "daily_2017.csv", "daily_2018.csv", "daily_2019.csv"]

# Note TOR_BUTTON_COMBINED was made manually from TOR_BUTTON_1-3
# combined at the date ranges listed in the markdown text above

In [2]:

combined_toronto = []

j = 0

# for each folder iterate
for folder in folders:
    
    # get the .cvs for daily readouts
    pathway1 = os.path.dirname(os.path.abspath('Clean_Merge_Data.ipynb'))
    csv_to_load = os.path.join(pathway1, folder, "Daily")
    
    
    combined = []
    i = 0

    # for each file iterate
    for file in daily_csv_list:

        csv_year = os.path.join(csv_to_load, file)

        # the first 24 rows are not part of the data, 25th row is header
        df_year = pd.read_csv(csv_year, skiprows=24)

        # remove the following columns from the dataframe, not useful or
        # contain null values
        df_drop_year = df_year.drop(["Data Quality", "Max Temp Flag", "Min Temp Flag", 
                  "Mean Temp Flag", "Heat Deg Days (°C)", "Heat Deg Days Flag", 
                  "Cool Deg Days (°C)", "Cool Deg Days Flag", "Total Rain Flag", 
                  "Total Snow Flag", "Total Precip Flag", "Snow on Grnd Flag", 
                  "Dir of Max Gust (10s deg)", "Dir of Max Gust Flag", 
                  "Spd of Max Gust Flag"], axis=1)

        # when these columns detected 0 they used a null value instead of
        # 0, fill the columns with the value 0 as it is stated in the
        # documentation
        df_drop_year[["Total Rain (mm)", "Total Snow (cm)", 
                  "Total Precip (mm)", "Snow on Grnd (cm)", 
                  "Spd of Max Gust (km/h)"]] = \
        df_drop_year[["Total Rain (mm)", "Total Snow (cm)", 
                  "Total Precip (mm)", "Snow on Grnd (cm)", 
                  "Spd of Max Gust (km/h)"]].fillna(value=0)

        # make .csv of the cleaned daily weather reports
        output1 = os.path.join(pathway1, folder, "Daily_Clean")
        pathway2 = os.path.join(output1, year_list[i])
        df_drop_year.to_csv(pathway2, index=False, header=True)

        # make a list of the 2014-2019 daily .csvs
        combined.append(df_drop_year)

        # counter increases by 1
        i += 1

    # .concact each of the daily .csvs after the iteration of each file
    # concact by adding to the rows (axis=0)
    # reset index as well
    combined = pd.concat(combined, axis=0)
    combined = combined.reset_index(drop=True)
    
    # print the dataframe to .csv
    output2 = os.path.join(pathway1, folder, "Daily_Combine")
    pathway3 = os.path.join(output2, "Daily_2014_2019.csv")
    combined.to_csv(pathway3, index=False, header=True)

    # make a list of all the 2014-2019 combined .csvs
    combined_toronto.append(combined)
    
    # reset the list for the next folder iteration
    combined = []
    
    # counter increases by 1
    j += 1

# .concact each of the combined daily .csvs after the iteration of each folder
# .concact by adding to the columns (axis=1)
# this makes further operations easier, getting the mean of the 5 combined .csvs
# reset index
combined_toronto = pd.concat(combined_toronto, axis=1)
combined_toronto = combined_toronto.reset_index(drop=True)

# print the dataframe to .csv
output3 = os.path.join(pathway1)
pathway4 = os.path.join(output3, "5_Daily_2014_2019.csv")
combined_toronto.to_csv(pathway4, index=False, header=True)

## Each Weather Station daily weather reports have been combined for years 2014-2019
Now it is time to give average values for the 5 weather stations

In [3]:
# call up the .csv with the 5 stations 2014-2019 daily weather reports
summed_csv = os.path.join(pathway1, "5_Daily_2014_2019.csv")
df_list = pd.read_csv(summed_csv)
df_list

Unnamed: 0,Date/Time,Year,Month,Day,Max Temp (°C),Min Temp (°C),Mean Temp (°C),Total Rain (mm),Total Snow (cm),Total Precip (mm),...,Month.4,Day.4,Max Temp (°C).4,Min Temp (°C).4,Mean Temp (°C).4,Total Rain (mm).4,Total Snow (cm).4,Total Precip (mm).4,Snow on Grnd (cm).4,Spd of Max Gust (km/h).4
0,2014-01-01,2014,1,1,-10.3,-16.9,-13.6,0.0,0.0,0.0,...,1,1,-10.0,-14.0,-12.0,0.0,0.0,0.0,15.0,0.0
1,2014-01-02,2014,1,2,-16.7,-21.2,-19.0,0.0,0.6,0.6,...,1,2,-16.5,-19.5,-18.0,0.0,0.6,0.4,14.0,0.0
2,2014-01-03,2014,1,3,-9.2,-25.3,-17.3,0.0,0.0,0.0,...,1,3,-6.0,-25.0,-15.5,0.0,0.0,0.0,14.0,0.0
3,2014-01-04,2014,1,4,-0.9,-9.4,-5.2,0.0,1.4,1.4,...,1,4,0.0,-15.5,-7.8,0.0,2.6,3.0,14.0,0.0
4,2014-01-05,2014,1,5,0.2,-3.2,-1.5,0.0,13.2,11.6,...,1,5,1.0,-3.5,-1.3,5.0,9.6,16.2,16.0,0.0
5,2014-01-06,2014,1,6,0.6,-16.4,-7.9,1.0,3.8,4.8,...,1,6,-5.0,-9.5,-7.3,0.0,0.0,0.0,22.0,0.0
6,2014-01-07,2014,1,7,-16.3,-23.5,-19.9,0.0,0.0,0.0,...,1,7,-14.0,-24.5,-19.3,0.0,0.0,0.0,22.0,0.0
7,2014-01-08,2014,1,8,-8.8,-17.7,-13.3,0.0,0.2,0.2,...,1,8,-8.0,-18.5,-13.3,0.0,0.0,0.0,22.0,0.0
8,2014-01-09,2014,1,9,-6.5,-15.4,-11.0,0.0,0.0,0.0,...,1,9,-2.0,-16.5,-9.3,0.0,3.0,2.4,21.0,0.0
9,2014-01-10,2014,1,10,1.9,-9.0,-3.6,1.0,2.8,3.4,...,1,10,2.0,-12.0,-5.0,7.0,0.0,7.0,23.0,0.0


In [4]:
# first we need to modify the "Spd of Max Gust (km/h)" column as
# it's type is object and cannot be .mean() until that is fixed
wind_speed = df_list[["Spd of Max Gust (km/h)", "Spd of Max Gust (km/h).1", 
       "Spd of Max Gust (km/h).2", "Spd of Max Gust (km/h).3", 
        "Spd of Max Gust (km/h).4"]]
wind_speed.head()

Unnamed: 0,Spd of Max Gust (km/h),Spd of Max Gust (km/h).1,Spd of Max Gust (km/h).2,Spd of Max Gust (km/h).3,Spd of Max Gust (km/h).4
0,<31,0.0,<31,35,0.0
1,35,0.0,<31,48,0.0
2,<31,0.0,<31,41,0.0
3,41,0.0,<31,57,0.0
4,<31,0.0,<31,<31,0.0


In [5]:
# Hide warning messages in this cell, cause there is a lot
import warnings
warnings.filterwarnings('ignore')

# remove the ">" and "<" from the "Spd of Max Gust (km/h)" column
wind_speed["Spd of Max Gust (km/h)"] = wind_speed["Spd of Max Gust (km/h)"].str.replace(">","")
wind_speed["Spd of Max Gust (km/h)"] = wind_speed["Spd of Max Gust (km/h)"].str.replace("<","")
wind_speed["Spd of Max Gust (km/h).2"] = wind_speed["Spd of Max Gust (km/h).2"].str.replace(">","")
wind_speed["Spd of Max Gust (km/h).2"] = wind_speed["Spd of Max Gust (km/h).2"].str.replace("<","")                                               
wind_speed["Spd of Max Gust (km/h).3"] = wind_speed["Spd of Max Gust (km/h).3"].str.replace(">","")
wind_speed["Spd of Max Gust (km/h).3"] = wind_speed["Spd of Max Gust (km/h).3"].str.replace("<","")                                                      
                                                      
# Columns .1 and .4 are abandoned as they contain no data
# change the dtype from objecty to float                                                        
wind_speed["Spd of Max Gust (km/h)"] = pd.to_numeric(wind_speed.loc[:, "Spd of Max Gust (km/h)"], errors="coerce").round(0)
wind_speed["Spd of Max Gust (km/h).2"] = pd.to_numeric(wind_speed.loc[:, "Spd of Max Gust (km/h).2"], errors="coerce").round(0)
wind_speed["Spd of Max Gust (km/h).3"] = pd.to_numeric(wind_speed.loc[:, "Spd of Max Gust (km/h).3"], errors="coerce").round(0)

wind_speed.head()

Unnamed: 0,Spd of Max Gust (km/h),Spd of Max Gust (km/h).1,Spd of Max Gust (km/h).2,Spd of Max Gust (km/h).3,Spd of Max Gust (km/h).4
0,31.0,0.0,31.0,35.0,0.0
1,35.0,0.0,31.0,48.0,0.0
2,31.0,0.0,31.0,41.0,0.0
3,41.0,0.0,31.0,57.0,0.0
4,31.0,0.0,31.0,31.0,0.0


In [6]:
summed_dict = {}

# create lists for dicitonary which will house the final dataframe
# date/time, year, month and day are listed
date_time = df_list.loc[:, "Date/Time"].tolist()
year = df_list.loc[:, "Year"].tolist()
month = df_list.loc[:, "Month"].tolist()
day = df_list.loc[:, "Day"].tolist()

# temperature max mean
mean_max = df_list.loc[:, ["Max Temp (°C)", "Max Temp (°C).1", "Max Temp (°C).2", 
              "Max Temp (°C).3", "Max Temp (°C).4"]].mean(axis=1).round(1).tolist()

# temperature min mean
mean_min = df_list.loc[:, ["Min Temp (°C)", "Min Temp (°C).1", "Min Temp (°C).2", 
              "Min Temp (°C).3", "Min Temp (°C).4"]].mean(axis=1).round(1).tolist()

# temperature mean mean
mean_mean = df_list.loc[:, ["Mean Temp (°C)", "Mean Temp (°C).1", "Mean Temp (°C).2", 
              "Mean Temp (°C).3", "Mean Temp (°C).4"]].mean(axis=1).round(1).tolist()

# rain mean
mean_rain = df_list.loc[:, ["Total Rain (mm)", "Total Rain (mm).1", "Total Rain (mm).2", 
              "Total Rain (mm).3", "Total Rain (mm).4"]].mean(axis=1).round(1).tolist()

# snow mean
mean_snow = df_list.loc[:, ["Total Snow (cm)", "Total Snow (cm).1", "Total Snow (cm).2", 
              "Total Snow (cm).3", "Total Snow (cm).4"]].mean(axis=1).round(1).tolist()

# precipitation mean
mean_precip = df_list.loc[:, ["Total Precip (mm)", "Total Precip (mm).1", "Total Precip (mm).2", 
              "Total Precip (mm).3", "Total Precip (mm).4"]].mean(axis=1).round(1).tolist()

# snow on ground mean
mean_grnd_snow = df_list.loc[:, ["Snow on Grnd (cm)", "Snow on Grnd (cm).1", "Snow on Grnd (cm).2", 
              "Snow on Grnd (cm).3", "Snow on Grnd (cm).4"]].mean(axis=1).round(1).tolist()

# wind max mean
mean_wind = wind_speed.loc[:, ["Spd of Max Gust (km/h)", "Spd of Max Gust (km/h).2", 
              "Spd of Max Gust (km/h).3"]].mean(axis=1).round(0).tolist()

# dictionary for final dataframe
summed_dict = {"Date/Time":date_time, 
               "Year":year, 
               "Month":month, 
               "Day":day, 
               "Max Temp (°C)":mean_max, 
               "Min Temp (°C)":mean_min, 
               "Mean Temp (°C)":mean_mean, 
               "Total Rain (mm)":mean_rain, 
               "Total Snow (cm)":mean_snow, 
               "Total Precip (mm)":mean_precip,
               "Snow on Grnd (cm)":mean_grnd_snow, 
               "Spd of Max Gust (km/h)":mean_wind
              }

# final dataframe
finished_df = pd.DataFrame.from_dict(summed_dict)
finished_df

Unnamed: 0,Date/Time,Year,Month,Day,Max Temp (°C),Min Temp (°C),Mean Temp (°C),Total Rain (mm),Total Snow (cm),Total Precip (mm),Snow on Grnd (cm),Spd of Max Gust (km/h)
0,2014-01-01,2014,1,1,-9.4,-14.9,-12.2,0.0,0.0,0.0,7.4,32.0
1,2014-01-02,2014,1,2,-15.3,-19.7,-17.5,0.0,0.4,0.8,7.2,38.0
2,2014-01-03,2014,1,3,-7.6,-23.6,-15.6,0.0,0.0,0.0,7.6,34.0
3,2014-01-04,2014,1,4,-0.1,-9.6,-4.9,0.0,0.9,1.0,7.6,43.0
4,2014-01-05,2014,1,5,0.8,-2.6,-1.0,1.4,6.9,12.9,8.8,31.0
5,2014-01-06,2014,1,6,0.2,-15.0,-7.4,1.0,1.0,4.7,14.0,57.0
6,2014-01-07,2014,1,7,-15.6,-23.2,-19.5,0.0,0.0,0.1,13.6,54.0
7,2014-01-08,2014,1,8,-8.3,-17.1,-12.7,0.0,0.0,0.0,13.8,33.0
8,2014-01-09,2014,1,9,-4.3,-14.2,-9.3,0.0,0.6,0.5,12.6,31.0
9,2014-01-10,2014,1,10,2.7,-7.6,-2.5,1.8,0.9,3.6,13.8,31.0


In [7]:
# put the dataframe to .csv
pathway5 = os.path.join(output3, "Completely_Averaged.csv")
finished_df.to_csv(pathway5, index=False, header=True)