First reading in and shaping the historic data.

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
measure_files = ["data/measurements/" + x for x in os.listdir("data/measurements")]

df = pd.DataFrame()
for file in measure_files:
    print(file)
    df = df.append(pd.read_csv(file), ignore_index=True)

data/measurements/time_series_data_2016.csv
data/measurements/time_series_data_2017.csv
data/measurements/time_series_data_2018.csv


In [3]:
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
df2 = df.copy()

mg_tb = pd.DataFrame(columns=['date', 'station'])
for stn in df2.station.unique().tolist() + [0]:
    temp = pd.DataFrame({
        'date': pd.date_range(df2.date.min(), df2.date.max(), freq='h'),
        'station': stn
    })
    mg_tb = mg_tb.append(temp, ignore_index=True)
    
df2 = df2.merge(mg_tb, on=['date', 'station'], how='right')

Now creating cyclical time (cos and sin seconds after midnight) and cyclical months.

In [4]:
df3 = df2.copy()

# Getting seconds after midnight.
df3['seconds'] = df3['date'].dt.hour * 3600 + df3['date'].dt.minute * 60 + df3['date'].dt.second

# Transforming to 2D.
seconds_in_day = 24*60*60
df3['sin_time'] = np.sin(2*np.pi*df3.seconds/seconds_in_day)
df3['cos_time'] = np.cos(2*np.pi*df3.seconds/seconds_in_day)

# Getting month.
df3['months'] = df3['date'].dt.month - 1

# Transforming to 2D.
max_month = 11
df3['sin_month'] = np.sin(2*np.pi*df3.months/max_month)
df3['cos_month'] = np.cos(2*np.pi*df3.months/max_month)

df3.drop(columns=['seconds', 'months'], inplace=True)

Saving to intermediate file for exploration and modelling.

In [5]:
df3.to_csv("data/intermediate/td_data.csv", index=False)
df3.head()

Unnamed: 0,date,BEN,CO,EBE,MXY,NMHC,NO_2,NOx,OXY,O_3,...,PM25,PXY,SO_2,TCH,TOL,station,sin_time,cos_time,sin_month,cos_month
0,2016-01-01 01:00:00,,1.85,,,,221.300003,873.400024,,4.24,...,69.010002,,54.490002,,,28079001.0,0.258819,0.965926,0.0,1.0
1,2016-01-01 02:00:00,,2.39,,,,239.899994,847.099976,,4.22,...,109.099998,,60.18,,,28079001.0,0.5,0.866025,0.0,1.0
2,2016-01-01 03:00:00,,1.3,,,,139.800003,390.899994,,3.87,...,70.860001,,42.419998,,,28079001.0,0.707107,0.707107,0.0,1.0
3,2016-01-01 04:00:00,,1.0,,,,125.0,379.799988,,4.01,...,36.709999,,35.349998,,,28079001.0,0.866025,0.5,0.0,1.0
4,2016-01-01 05:00:00,,1.11,,,,152.0,537.700012,,4.07,...,39.939999,,39.470001,,,28079001.0,0.965926,0.258819,0.0,1.0
