In [10]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import minmax_scale

# Normalize weather dataset

In [11]:
def normalize(data):
    data = minmax_scale(data)
    return data

In [20]:
weather_dataset = pd.read_csv('interpolated_weather_with_precip_2015_3.csv', skipinitialspace=True, encoding='cp1252')
date_dataset = weather_dataset["Date"]
weather_dataset = weather_dataset.drop(['Date'], axis=1)

In [21]:
columns = weather_dataset.columns
for column in columns:
    weather_dataset[column] = normalize(weather_dataset[column])
weather_dataset

Unnamed: 0,temperatureC,dewpointC,windspeedKMH,pressureMB,pressureTendencyMB,cloudCoverOKTA,lowCloudCoverOKTA,HKm,visibilityKM,precipMM
0,0.310811,0.321739,0.230769,0.743590,0.837838,1.0,0.75,1.0,0.777778,0.000000
1,0.319257,0.322464,0.237179,0.745370,0.806306,1.0,0.75,1.0,0.759259,0.000000
2,0.327703,0.323188,0.243590,0.747151,0.774775,1.0,0.75,1.0,0.740741,0.000000
3,0.336149,0.323913,0.250000,0.748932,0.743243,1.0,0.75,1.0,0.722222,0.000000
4,0.344595,0.324638,0.256410,0.750712,0.711712,1.0,0.75,1.0,0.703704,0.000000
5,0.353041,0.325362,0.262821,0.752493,0.680180,1.0,0.75,1.0,0.685185,0.000000
6,0.361486,0.326087,0.269231,0.754274,0.648649,1.0,0.75,1.0,0.666667,0.000000
7,0.369932,0.326812,0.275641,0.756054,0.617117,1.0,0.75,1.0,0.648148,0.000000
8,0.378378,0.327536,0.282051,0.757835,0.585586,1.0,0.75,1.0,0.629630,0.000000
9,0.386824,0.328261,0.288462,0.759615,0.554054,1.0,0.75,1.0,0.611111,0.000000


# Correlate traffic and weather dataset

In [12]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [13]:
for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # correlate
    corr = dataset.corr(method='spearman')
    corr.to_csv('corr_mmda_ogimet_2015/corr_results_mmda_ogimet_' + road + '_2015.csv')

# Correlate traffic and weather dataset with lags

In [14]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [7]:
total_lags = 8

for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # make a copy
    new_dataset = dataset.copy()
    
    for i in range(1, (total_lags+1)):
        new_dataset.statusN = new_dataset.statusN.shift(-1)
        new_dataset.statusS = new_dataset.statusS.shift(-1)
        new_dataset = new_dataset[:(len(new_dataset)-1)]

        corr = new_dataset.corr(method='spearman')
        corr.to_csv('corr_mmda_ogimet_2015_lags/corr_mmda_ogimet_' + road + '_2015_lag_' + str(i) + '.csv')
    
    print(road)

A. Maceda
Anda Circle
Antipolo
Bluementritt
Buendia
Edsa Extension
Finance Road
Gov. Forbes - Lacson
Lerma
Magsaysay Ave
P.Noval
Pablo Ocampo
Pedro Gil
Quezon Ave.
Quirino
Rajah Sulayman
Taft Ave.
U.N. Avenue
Vicente Cruz


In [16]:

for i in range(1, (total_lags+1)):
    
    lag_dataset = []
    
    for road in roads:
        dataset = pd.read_csv('corr_mmda_ogimet_2015_lags/corr_mmda_ogimet_' + road + '_2015_lag_' + str(i) + '.csv', skipinitialspace=True, encoding='cp1252')
        dataset = dataset.loc[:, ~dataset.columns.str.contains('^Unnamed')]
        dataset = dataset.loc[:1]
        
        lag_dataset.append(dataset)
    
    lag_dataset = pd.concat(lag_dataset)
    lag_dataset.to_csv('corr_mmda_ogimet_2015_lags/corr_mmda_ogimet_2015_lag_' + str(i) + '.csv')

In [22]:
date_dataset

0           1/1/2015 0:00
1           1/1/2015 0:15
2           1/1/2015 0:30
3           1/1/2015 0:45
4           1/1/2015 1:00
5           1/1/2015 1:15
6           1/1/2015 1:30
7           1/1/2015 1:45
8           1/1/2015 2:00
9           1/1/2015 2:15
10          1/1/2015 2:30
11          1/1/2015 2:45
12          1/1/2015 3:00
13          1/1/2015 3:15
14          1/1/2015 3:30
15          1/1/2015 3:45
16          1/1/2015 4:00
17          1/1/2015 4:15
18          1/1/2015 4:30
19          1/1/2015 4:45
20          1/1/2015 5:00
21          1/1/2015 5:15
22          1/1/2015 5:30
23          1/1/2015 5:45
24          1/1/2015 6:00
25          1/1/2015 6:15
26          1/1/2015 6:30
27          1/1/2015 6:45
28          1/1/2015 7:00
29          1/1/2015 7:15
               ...       
35010    12/31/2015 16:30
35011    12/31/2015 16:45
35012    12/31/2015 17:00
35013    12/31/2015 17:15
35014    12/31/2015 17:30
35015    12/31/2015 17:45
35016    12/31/2015 18:00
35017    12/