In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import minmax_scale

# Get weather dataset

In [12]:
weather_dataset = pd.read_csv('weather_wwo_manila_2015.csv', skipinitialspace=True, encoding='cp1252')
cols = ["tempC", "windspeedKmph", "cond", "precipMM", "humidity", "visibility", "pressure", "cloudcover", "heatIndexC", "dewPointC", "windChillC", "windGustKmph", "feelsLikeC"]
date_dataset = weather_dataset["dt"]
weather_dataset = weather_dataset[cols]

# Correlate traffic and weather dataset

In [3]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [None]:
for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # correlate
    corr = dataset.corr(method='spearman')
    corr.to_csv('corr_mmda_wwo_2015/corr_results_mmda_wwo_' + road + '_2015.csv')

# Correlate traffic and weather dataset wth lags

In [None]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [None]:
total_lags = 8

for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # make a copy
    new_dataset = dataset.copy()
    
    for i in range(1, (total_lags+1)):
        new_dataset.statusN = new_dataset.statusN.shift(-1)
        new_dataset.statusS = new_dataset.statusS.shift(-1)
        new_dataset = new_dataset[:(len(new_dataset)-1)]

        corr = new_dataset.corr(method='spearman')
        corr.to_csv('corr_mmda_wwo_2015_lags/corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv')

In [4]:

for i in range(1, (total_lags+1)):
    
    lag_dataset = []
    
    for road in roads:
        dataset = pd.read_csv('corr_mmda_wwo_2015_lags/corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv', skipinitialspace=True, encoding='cp1252')
        dataset = dataset.loc[:, ~dataset.columns.str.contains('^Unnamed')]
        dataset = dataset.loc[:1]
        
        lag_dataset.append(dataset)
    
    lag_dataset = pd.concat(lag_dataset)
    lag_dataset.to_csv('corr_mmda_wwo_2015_lags/corr_mmda_wwo_2015_lag_' + str(i) + '.csv')

NameError: name 'total_lags' is not defined

# Correlate traffic and weather with wet dry seasons

In [6]:
def getWetDrySeasons(my_dataset):
    # choose wet (jun to oct) season
    start_date = '2015-06-01 00:00:00'
    end_date = '2015-10-31 23:45:00'
    wet_mask = (my_dataset['dt'] >= start_date) & (my_dataset['dt'] <= end_date)
    wet_dataset = my_dataset.loc[wet_mask]
    
    # choose dry (nov to dec, jan to may)
    first_start_date = '2015-01-01 00:00:00'
    first_end_date = '2015-05-31 23:45:00'
    second_start_date = '2015-11-1 00:00:00'
    second_end_date = '2015-12-31 23:45:00'
    first_dry_mask = (my_dataset['dt'] >= first_start_date) & (my_dataset['dt'] <= first_end_date)
    second_dry_mask = (my_dataset['dt'] >= second_start_date) & (my_dataset['dt'] <= second_end_date)
    dry_dataset = my_dataset.loc[first_dry_mask]
    dry_dataset = pd.concat([dry_dataset, my_dataset.loc[second_dry_mask]])
    
    return wet_dataset, dry_dataset

In [7]:
for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['dt', 'statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # ensure dt is datetime
    dataset['dt'] = pd.to_datetime(dataset['dt']) 
    
    # get wet and dry season dataset
    wet_dataset, dry_dataset = getWetDrySeasons(dataset)
    
    # correlate
    corr = wet_dataset.corr(method='spearman')
    corr.to_csv('corr_mmda_wwo_2015_seasons_wetdry/wet_corr_mmda_wwo_' + road + '_2015.csv')
    corr = dry_dataset.corr(method='spearman')
    corr.to_csv('corr_mmda_wwo_2015_seasons_wetdry/dry_corr_mmda_wwo_' + road + '_2015.csv')

In [8]:
wet_df = []
dry_df = []

for road in roads:
    wet_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_wetdry/wet_corr_mmda_wwo_' + road + '_2015.csv', skipinitialspace=True, encoding='cp1252')
    wet_dataset = wet_dataset.loc[:, ~wet_dataset.columns.str.contains('^Unnamed')]
    wet_dataset = wet_dataset.loc[:1]
    wet_df.append(wet_dataset)
    
    dry_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_wetdry/dry_corr_mmda_wwo_' + road + '_2015.csv', skipinitialspace=True, encoding='cp1252')
    dry_dataset = dry_dataset.loc[:, ~dry_dataset.columns.str.contains('^Unnamed')]
    dry_dataset = dry_dataset.loc[:1]
    dry_df.append(dry_dataset)
    
wet_df = pd.concat(wet_df)
wet_df.to_csv('corr_mmda_wwo_2015_seasons_wetdry/wet_corr_mmda_wwo_2015.csv')
dry_df = pd.concat(dry_df)
dry_df.to_csv('corr_mmda_wwo_2015_seasons_wetdry/dry_corr_mmda_wwo_2015.csv')

# Correlate traffic and weather dataset with wet dry seasons and lags

In [9]:
total_lags = 8

for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['dt', 'statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # get wet and dry season dataset
    wet_dataset, dry_dataset = getWetDrySeasons(dataset)
    
    # make a copy
    new_wet_dataset = wet_dataset.copy()
    new_dry_dataset = dry_dataset.copy()
    
    for i in range(1, (total_lags+1)):
        new_wet_dataset.statusN = new_wet_dataset.statusN.shift(-1)
        new_wet_dataset.statusS = new_wet_dataset.statusS.shift(-1)
        new_wet_dataset = new_wet_dataset[:(len(new_wet_dataset)-1)]

        corr = new_wet_dataset.corr(method='spearman')
        corr.to_csv('corr_mmda_wwo_2015_seasons_wetdry_lags/wet_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv')
        
        new_dry_dataset.statusN = new_dry_dataset.statusN.shift(-1)
        new_dry_dataset.statusS = new_dry_dataset.statusS.shift(-1)
        new_dry_dataset = new_dry_dataset[:(len(new_dry_dataset)-1)]

        corr = new_dry_dataset.corr(method='spearman')
        corr.to_csv('corr_mmda_wwo_2015_seasons_wetdry_lags/dry_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv')

In [10]:
for i in range(1, (total_lags+1)):
    
    wet_lag_dataset = []
    dry_lag_dataset = []
    
    for road in roads:
        wet_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_wetdry_lags/wet_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv', skipinitialspace=True, encoding='cp1252')
        wet_dataset = wet_dataset.loc[:, ~wet_dataset.columns.str.contains('^Unnamed')]
        wet_dataset = wet_dataset.loc[:1]
        wet_lag_dataset.append(wet_dataset)
        
        dry_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_wetdry_lags/dry_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv', skipinitialspace=True, encoding='cp1252')
        dry_dataset = dry_dataset.loc[:, ~dry_dataset.columns.str.contains('^Unnamed')]
        dry_dataset = dry_dataset.loc[:1]
        dry_lag_dataset.append(dry_dataset)
    
    wet_lag_dataset = pd.concat(wet_lag_dataset)
    wet_lag_dataset.to_csv('corr_mmda_wwo_2015_seasons_wetdry_lags/wet_corr_mmda_wwo_2015_lag_' + str(i) + '.csv')
    dry_lag_dataset = pd.concat(dry_lag_dataset)
    dry_lag_dataset.to_csv('corr_mmda_wwo_2015_seasons_wetdry_lags/dry_corr_mmda_wwo_2015_lag_' + str(i) + '.csv')

# Correlate traffic and weather dataset with cool hot seasons

In [13]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [14]:
def getCoolHotSeasons(my_dataset):
    # choose hot (march to oct) season
    start_date = '2015-03-01 00:00:00'
    end_date = '2015-10-31 23:45:00'
    hot_mask = (my_dataset['dt'] >= start_date) & (my_dataset['dt'] <= end_date)
    hot_dataset = my_dataset.loc[hot_mask]
    
    # choose cool (nov to dec, jan to feb)
    first_start_date = '2015-01-01 00:00:00'
    first_end_date = '2015-02-28 23:45:00'
    second_start_date = '2015-11-1 00:00:00'
    second_end_date = '2015-12-31 23:45:00'
    first_cool_mask = (my_dataset['dt'] >= first_start_date) & (my_dataset['dt'] <= first_end_date)
    second_cool_mask = (my_dataset['dt'] >= second_start_date) & (my_dataset['dt'] <= second_end_date)
    cool_dataset = my_dataset.loc[first_cool_mask]
    cool_dataset = pd.concat([cool_dataset, my_dataset.loc[second_cool_mask]])
    
    return hot_dataset, cool_dataset

In [15]:
for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['dt', 'statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # ensure dt is datetime
    dataset['dt'] = pd.to_datetime(dataset['dt']) 
    
    # get hot and cool season dataset
    hot_dataset, cool_dataset = getCoolHotSeasons(dataset)
    
    # correlate
    corr = hot_dataset.corr(method='spearman')
    corr.to_csv('corr_mmda_wwo_2015_seasons_hotcool/hot_corr_mmda_wwo_' + road + '_2015.csv')
    corr = cool_dataset.corr(method='spearman')
    corr.to_csv('corr_mmda_wwo_2015_seasons_hotcool/cool_corr_mmda_wwo_' + road + '_2015.csv')

In [16]:
hot_df = []
cool_df = []

for road in roads:
    hot_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_hotcool/hot_corr_mmda_wwo_' + road + '_2015.csv', skipinitialspace=True, encoding='cp1252')
    hot_dataset = hot_dataset.loc[:, ~hot_dataset.columns.str.contains('^Unnamed')]
    hot_dataset = hot_dataset.loc[:1]
    hot_df.append(hot_dataset)
    
    cool_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_hotcool/cool_corr_mmda_wwo_' + road + '_2015.csv', skipinitialspace=True, encoding='cp1252')
    cool_dataset = cool_dataset.loc[:, ~cool_dataset.columns.str.contains('^Unnamed')]
    cool_dataset = cool_dataset.loc[:1]
    cool_df.append(cool_dataset)
    
hot_df = pd.concat(hot_df)
hot_df.to_csv('corr_mmda_wwo_2015_seasons_hotcool/hot_corr_mmda_wwo_2015.csv')
cool_df = pd.concat(cool_df)
cool_df.to_csv('corr_mmda_wwo_2015_seasons_hotcool/cool_corr_mmda_wwo_2015.csv')

In [17]:
total_lags = 8

for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['dt', 'statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # get hot and cool season dataset
    hot_dataset, cool_dataset = getCoolHotSeasons(dataset)
    
    # make a copy
    new_hot_dataset = hot_dataset.copy()
    new_cool_dataset = cool_dataset.copy()
    
    for i in range(1, (total_lags+1)):
        new_hot_dataset.statusN = new_hot_dataset.statusN.shift(-1)
        new_hot_dataset.statusS = new_hot_dataset.statusS.shift(-1)
        new_hot_dataset = new_hot_dataset[:(len(new_hot_dataset)-1)]

        corr = new_hot_dataset.corr(method='spearman')
        corr.to_csv('corr_mmda_wwo_2015_seasons_hotcool_lags/hot_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv')
        
        new_cool_dataset.statusN = new_cool_dataset.statusN.shift(-1)
        new_cool_dataset.statusS = new_cool_dataset.statusS.shift(-1)
        new_cool_dataset = new_cool_dataset[:(len(new_cool_dataset)-1)]

        corr = new_cool_dataset.corr(method='spearman')
        corr.to_csv('corr_mmda_wwo_2015_seasons_hotcool_lags/cool_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv')

In [18]:
for i in range(1, (total_lags+1)):
    
    hot_lag_dataset = []
    cool_lag_dataset = []
    
    for road in roads:
        hot_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_hotcool_lags/hot_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv', skipinitialspace=True, encoding='cp1252')
        hot_dataset = hot_dataset.loc[:, ~hot_dataset.columns.str.contains('^Unnamed')]
        hot_dataset = hot_dataset.loc[:1]
        hot_lag_dataset.append(hot_dataset)
        
        cool_dataset = pd.read_csv('corr_mmda_wwo_2015_seasons_hotcool_lags/cool_corr_mmda_wwo_' + road + '_2015_lag_' + str(i) + '.csv', skipinitialspace=True, encoding='cp1252')
        cool_dataset = cool_dataset.loc[:, ~cool_dataset.columns.str.contains('^Unnamed')]
        cool_dataset = cool_dataset.loc[:1]
        cool_lag_dataset.append(cool_dataset)
    
    hot_lag_dataset = pd.concat(hot_lag_dataset)
    hot_lag_dataset.to_csv('corr_mmda_wwo_2015_seasons_hotcool_lags/hot_corr_mmda_wwo_2015_lag_' + str(i) + '.csv')
    cool_lag_dataset = pd.concat(cool_lag_dataset)
    cool_lag_dataset.to_csv('corr_mmda_wwo_2015_seasons_hotcool_lags/cool_corr_mmda_wwo_2015_lag_' + str(i) + '.csv')