In [1]:
import numpy as np
import pandas as pd
import datetime
from sklearn.preprocessing import minmax_scale

In [2]:
def normalize(data):
    data = minmax_scale(data)
    return data

In [4]:
def getWetDrySeasons(my_dataset):
    # choose wet (jun to oct) season
    start_date = '2015-06-01 00:00:00'
    end_date = '2015-10-31 23:45:00'
    wet_mask = (my_dataset.index >= start_date) & (my_dataset.index <= end_date)
    wet_dataset = my_dataset.loc[wet_mask]
    
    # choose dry (nov to dec, jan to may)
    first_start_date = '2015-01-01 00:00:00'
    first_end_date = '2015-05-31 23:45:00'
    second_start_date = '2015-11-1 00:00:00'
    second_end_date = '2015-12-31 23:45:00'
    first_dry_mask = (my_dataset.index >= first_start_date) & (my_dataset.index <= first_end_date)
    second_dry_mask = (my_dataset.index >= second_start_date) & (my_dataset.index <= second_end_date)
    dry_dataset = my_dataset.loc[first_dry_mask]
    dry_dataset = pd.concat([dry_dataset, my_dataset.loc[second_dry_mask]])
    
    return wet_dataset, dry_dataset

In [3]:
roads = ["A. Maceda", "Anda Circle", "Antipolo", "Bluementritt", "Buendia", "Edsa Extension", "Finance Road", "Gov. Forbes - Lacson", "Lerma", "Magsaysay Ave", "P.Noval", "Pablo Ocampo", "Pedro Gil", "Quezon Ave.", "Quirino", "Rajah Sulayman", "Taft Ave.", "U.N. Avenue", "Vicente Cruz"]

In [6]:
for road in roads:
    # obtain raw data
    dataset = pd.read_csv('raw/raw_mmda_wwo_2015/raw_mmda_wwo_' + road + '_2015.csv', index_col=['dt'], skipinitialspace=True, encoding='cp1252')
    
    # ensure is datetime
    dataset.index = pd.to_datetime(dataset.index) 
    
    # get wet and dry season dataset
    wet_dataset, dry_dataset = getWetDrySeasons(dataset)
    
    # save RAW to csv
    wet_dataset.to_csv('raw/raw_mmda_wwo_2015_seasonDryWet/raw_mmda_wwo_' + road + '_2015_seasonWet.csv')
    dry_dataset.to_csv('raw/raw_mmda_wwo_2015_seasonDryWet/raw_mmda_wwo_' + road + '_2015_seasonDry.csv')
    
    # normalize
    columns = dataset.columns
    for column in columns:
        wet_dataset[column] = normalize(wet_dataset[column])
        dry_dataset[column] = normalize(dry_dataset[column])
    
    # save NORMALIZED to csv
    wet_dataset.to_csv('normalized/normalized_mmda_wwo_2015_seasonDryWet/normalized_mmda_wwo_' + road + '_2015_seasonWet.csv')
    dry_dataset.to_csv('normalized/normalized_mmda_wwo_2015_seasonDryWet/normalized_mmda_wwo_' + road + '_2015_seasonDry.csv')
    