In [1]:
import numpy as np
import pandas as pd
import datetime
from sklearn.preprocessing import minmax_scale

# Get Weather Dataset

In [2]:
def normalize(data):
    data = minmax_scale(data)
    return data

weather_dataset = pd.read_csv('interpolated_weather_with_precip_2015_3.csv', skipinitialspace=True, encoding='cp1252')
weather_dataset = weather_dataset.drop(['Date'], axis=1)

columns = weather_dataset.columns
for column in columns:
    weather_dataset[column] = normalize(weather_dataset[column])

# Get wet and dry season

In [3]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [4]:
def getWetDrySeasons(my_dataset):
    # choose wet (jun to oct) season
    start_date = '2015-06-01 00:00:00'
    end_date = '2015-10-31 23:45:00'
    wet_mask = (my_dataset['dt'] >= start_date) & (my_dataset['dt'] <= end_date)
    wet_dataset = my_dataset.loc[wet_mask]
    
    # choose dry (nov to dec, jan to may)
    first_start_date = '2015-01-01 00:00:00'
    first_end_date = '2015-05-31 23:45:00'
    second_start_date = '2015-11-1 00:00:00'
    second_end_date = '2015-12-31 23:45:00'
    first_dry_mask = (my_dataset['dt'] >= first_start_date) & (my_dataset['dt'] <= first_end_date)
    second_dry_mask = (my_dataset['dt'] >= second_start_date) & (my_dataset['dt'] <= second_end_date)
    dry_dataset = my_dataset.loc[first_dry_mask]
    dry_dataset = pd.concat([dry_dataset, my_dataset.loc[second_dry_mask]])
    
    return wet_dataset, dry_dataset

In [6]:
for road in roads:
    traffic_dataset = pd.read_csv('mmda_2015_transformed/mmda_' + road + '_2015_transformed.csv', skipinitialspace=True, encoding='cp1252')
    traffic_dataset = traffic_dataset[['dt', 'statusN', 'statusS']]
    
    # merge traffic and weather dataset
    dataset = pd.concat([traffic_dataset, weather_dataset], axis=1, join='inner')
    
    # ensure dt is datetime
    dataset['dt'] = pd.to_datetime(dataset['dt']) 
    
    # get wet and dry season dataset
    wet_dataset, dry_dataset = getWetDrySeasons(dataset)
    
    # make 'dt' the index
    wet_dataset = wet_dataset.set_index(['dt'])
    dry_dataset = dry_dataset.set_index(['dt'])
    
    # save to csv
    wet_dataset.to_csv('merged_mmda_ogimet_2015_seasonDryWet/merged_mmda_ogimet_' + road + '_2015_seasonWet.csv')
    dry_dataset.to_csv('merged_mmda_ogimet_2015_seasonDryWet/merged_mmda_ogimet_' + road + '_2015_seasonDry.csv')