# Load and Export Sheringham Climate data
This historical data can be found at https://climate.weather.gc.ca/climate_data/hourly_data_e.html?hlyRange=1994-02-01%7C2021-01-18&dlyRange=1992-05-01%7C2021-01-18&mlyRange=1992-01-01%7C2007-02-01&StationID=6811&Prov=BC&urlExtension=_e.html&searchType=stnProx&optLimit=yearRange&StartYear=2010&EndYear=2021&selRowPerPage=25&Line=0&txtRadius=50&optProxType=decimal&selCity=&selPark=&txtCentralLatDeg=&txtCentralLatMin=0&txtCentralLatSec=0&txtCentralLongDeg=&txtCentralLongMin=0&txtCentralLongSec=0&txtLatDecDeg=48.3305&txtLongDecDeg=-124.1081&timeframe=1&time=LST&time=LST&Year=2007&Month=1&Day=20#

In [1]:
from pathlib import Path
import time
#import datetime as dt
import pandas as pd
import numpy as np
import glob, os   

In [3]:
drop_vars = (
    "Station Name", "Climate ID", "Year", "Month", "Day", 
    "Time (LST)", "Temp Flag", "Dew Point Temp (°C)",
    "Dew Point Temp Flag", "Rel Hum (%)", "Rel Hum Flag",
    "Precip. Amount (mm)", "Precip. Amount Flag", "Wind Dir Flag",
    "Wind Spd Flag", "Visibility (km)", "Visibility Flag",
    "Stn Press Flag","Hmdx", "Hmdx Flag", "Wind Chill",
    "Wind Chill Flag", "Weather"
)

cols = list(pd.read_csv("en_climate_hourly_BC_1017254_10-2017_P1H.csv", nrows =1))

In [4]:
all_files = sorted(glob.glob(os.path.join('', "en_climate_hourly_BC_1017254_*.csv")))

Sheringham = pd.concat((pd.read_csv(f,usecols =[i for i in cols if i not in drop_vars]) for f in all_files),ignore_index=True)

In [5]:
Sheringham

Unnamed: 0,Longitude (x),Latitude (y),Date/Time (LST),Temp (°C),Wind Dir (10s deg),Wind Spd (km/h),Stn Press (kPa)
0,-123.92,48.38,2007-01-01 00:00,6.3,9.0,6.0,102.25
1,-123.92,48.38,2007-01-01 01:00,6.5,31.0,6.0,102.24
2,-123.92,48.38,2007-01-01 02:00,7.2,5.0,7.0,102.13
3,-123.92,48.38,2007-01-01 03:00,7.0,29.0,11.0,102.16
4,-123.92,48.38,2007-01-01 04:00,6.7,7.0,6.0,102.10
...,...,...,...,...,...,...,...
122731,-123.92,48.38,2020-12-31 19:00,7.6,5.0,6.0,101.52
122732,-123.92,48.38,2020-12-31 20:00,7.4,8.0,7.0,101.53
122733,-123.92,48.38,2020-12-31 21:00,7.6,4.0,11.0,101.39
122734,-123.92,48.38,2020-12-31 22:00,8.0,3.0,11.0,101.36


In [6]:
#out of curiosity lets count the number of NaN
Sheringham.isna().sum()

Longitude (x)             0
Latitude (y)              0
Date/Time (LST)           0
Temp (°C)             38166
Wind Dir (10s deg)    36686
Wind Spd (km/h)       33185
Stn Press (kPa)       32794
dtype: int64

In [8]:
#Replace NaN
Sheringham['Temp (°C)'] = Sheringham['Temp (°C)'].fillna(0)
Sheringham['Wind Dir (10s deg)'] = Sheringham['Wind Dir (10s deg)'].fillna(0)
Sheringham['Wind Spd (km/h)'] = Sheringham['Wind Spd (km/h)'].fillna(0)
Sheringham['Stn Press (kPa)'] = Sheringham['Stn Press (kPa)'].fillna(101)

In [9]:
Sheringham

Unnamed: 0,Longitude (x),Latitude (y),Date/Time (LST),Temp (°C),Wind Dir (10s deg),Wind Spd (km/h),Stn Press (kPa)
0,-123.92,48.38,2007-01-01 00:00,6.3,9.0,6.0,102.25
1,-123.92,48.38,2007-01-01 01:00,6.5,31.0,6.0,102.24
2,-123.92,48.38,2007-01-01 02:00,7.2,5.0,7.0,102.13
3,-123.92,48.38,2007-01-01 03:00,7.0,29.0,11.0,102.16
4,-123.92,48.38,2007-01-01 04:00,6.7,7.0,6.0,102.10
...,...,...,...,...,...,...,...
122731,-123.92,48.38,2020-12-31 19:00,7.6,5.0,6.0,101.52
122732,-123.92,48.38,2020-12-31 20:00,7.4,8.0,7.0,101.53
122733,-123.92,48.38,2020-12-31 21:00,7.6,4.0,11.0,101.39
122734,-123.92,48.38,2020-12-31 22:00,8.0,3.0,11.0,101.36


In [10]:
#export the data as a csv
Sheringham.to_csv('Sheringham.csv',index=False)