First reading in and shaping the historic data.

In [1]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

In [2]:
measure_files = ["data/measurements/" + x for x in os.listdir("data/measurements")]

df = pd.DataFrame()
for file in measure_files:
    print(file)
    df = df.append(pd.read_csv(file), ignore_index=True)

data/measurements/time_series_data_2016.csv
data/measurements/time_series_data_2017.csv
data/measurements/time_series_data_2018.csv


In [3]:
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
df2 = df.copy()

mg_tb = pd.DataFrame(columns=['date', 'station'])
for stn in df2.station.unique().tolist() + [0]:
    temp = pd.DataFrame({
        'date': pd.date_range(df2.date.min(), df2.date.max(), freq='h'),
        'station': stn
    })
    mg_tb = mg_tb.append(temp, ignore_index=True)
    
df2 = df2.merge(mg_tb, on=['date', 'station'], how='right')

Now creating cyclical time (cos and sin seconds after midnight) and cyclical months.

In [4]:
df3 = df2.copy()

# Getting seconds after midnight.
df3['seconds'] = df3['date'].dt.hour * 3600 + df3['date'].dt.minute * 60 + df3['date'].dt.second

# Transforming to 2D.
seconds_in_day = 24*60*60
df3['sin_time'] = np.sin(2*np.pi*df3.seconds/seconds_in_day)
df3['cos_time'] = np.cos(2*np.pi*df3.seconds/seconds_in_day)

# Getting month.
df3['months'] = df3['date'].dt.month - 1

# Transforming to 2D.
max_month = 11
df3['sin_month'] = np.sin(2*np.pi*df3.months/max_month)
df3['cos_month'] = np.cos(2*np.pi*df3.months/max_month)

df3.drop(columns=['seconds', 'months'], inplace=True)

Reading in data on stations and adding data for ADL HQ.

In [5]:
# Reading in station attribute file and appending ADL HQ details.
attr = pd.read_csv("data/station/attributes.csv")
attr = attr.append({'name': 'ADL HQ', 
                    'id': 0,
                    'address': 'Paseo de la Castellana, 13, 28046 Madrid, Spain',
                    'elevation': 691}, ignore_index=True)

# Reading in geojson file and appending ADL HQ details.
stations = gpd.read_file("data/station/locations.geojson")
stations = stations.append({'id': 0, 
                          'lon': -3.688163914, 
                          'lat': 40.439664908, 
                          'geometry': Point(-3.688163914, 40.439664908)}, 
                         ignore_index=True)

# Merging the two together.
stations = stations.merge(attr, on='id', how='left')

# Creating an indicator for the legend.
stations['HQ'] = np.where(stations['name'] == 'ADL HQ', 'ADL Headquarters', 'Station')

# Renaming id to station
stations.rename(columns={'id': 'station'}, inplace=True)

# Merging on stations.
df4 = df3.merge(stations[['station']], on='station', how='inner')

Saving to intermediate file for exploration and modelling.

In [7]:
df4.to_csv("data/intermediate/td_data.csv", index=False)
stations.to_file("data/intermediate/stations.shp", index=False)