# Machine Learning Nanodegree
## Capstone Project: Electric Vehicle Detection 

### Setup

In [1]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
from IPython.display import display 

# Pretty display for notebooks
%matplotlib inline

### Data Clean Up

In [2]:
ev_train_raw = pd.read_csv('EV_train.csv')
ev_train_labels_raw = pd.read_csv('EV_train_labels.csv')

In [3]:
print(ev_train_raw.shape)
print(ev_train_labels_raw.shape)
display(ev_train_raw.head())
display(ev_train_labels_raw.head())

(1590, 2881)
(1590, 2881)


Unnamed: 0,House ID,Interval_1,Interval_2,Interval_3,Interval_4,Interval_5,Interval_6,Interval_7,Interval_8,Interval_9,...,Interval_2871,Interval_2872,Interval_2873,Interval_2874,Interval_2875,Interval_2876,Interval_2877,Interval_2878,Interval_2879,Interval_2880
0,11655099,0.95,0.826,0.361,0.238,0.342,0.233,0.351,0.194,0.292,...,0.664,0.783,0.601,0.639,0.417,0.439,0.226,0.19,0.71,0.728
1,11633257,0.353,0.327,0.358,0.292,0.285,0.304,0.361,0.342,0.355,...,0.536,0.558,0.622,0.634,0.513,0.421,0.273,0.296,0.291,0.289
2,11651552,0.15,0.181,0.15,0.15,0.131,0.125,0.088,0.106,0.094,...,2.125,0.881,0.481,1.194,0.138,0.119,0.038,0.088,0.056,0.113
3,11636092,2.088,2.075,2.121,2.098,2.046,2.081,1.847,0.42,0.399,...,0.62,0.487,0.563,0.419,0.379,0.359,0.347,0.325,0.33,0.34
4,11647239,1.416,1.25,1.27,1.258,1.239,1.753105,4.609256,4.619256,4.075151,...,1.596,1.667,1.569,1.664,1.58,1.635,1.568,1.565,1.575,1.571


Unnamed: 0,House ID,Interval_1,Interval_2,Interval_3,Interval_4,Interval_5,Interval_6,Interval_7,Interval_8,Interval_9,...,Interval_2871,Interval_2872,Interval_2873,Interval_2874,Interval_2875,Interval_2876,Interval_2877,Interval_2878,Interval_2879,Interval_2880
0,11655099,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,11633257,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,11651552,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,11636092,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,11647239,0,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0


In [4]:
ev_train_pivot = ev_train_raw.copy()
ev_train_pivot = pd.wide_to_long(ev_train_pivot, stubnames='Interval_', i='House ID', j='kWh').reset_index()
ev_train_pivot.columns = ['House ID', 'Interval', 'kWh']
ev_train_pivot['Interval'] = pd.to_numeric(ev_train_pivot['Interval'], downcast='integer')
ev_train_pivot = ev_train_pivot.sort_values(['House ID', 'Interval']).reset_index(drop=True)
ev_train_pivot[ev_train_pivot['House ID']==11655099].head()

Unnamed: 0,House ID,Interval,kWh
4533120,11655099,1,0.95
4533121,11655099,2,0.826
4533122,11655099,3,0.361
4533123,11655099,4,0.238
4533124,11655099,5,0.342


In [5]:
print(ev_train_pivot.shape)
print(2*24*60*1590)

temp_dim_dict = {
    "Day": np.tile(np.repeat(np.arange(1,61),48),1590),
    "Hour": np.tile(np.tile(np.repeat(np.arange(1,25),2),60),1590),
    "Half Hour": np.tile(np.tile(np.arange(1,49),60),1590)
}

temp_dim_df = pd.DataFrame.from_dict(temp_dim_dict)

(4579200, 3)
4579200


In [6]:
ev_train_pivot_temp = pd.concat([ev_train_pivot, temp_dim_df], axis=1)
ev_train_pivot_temp_cols = ['House ID','Day','Hour','Half Hour','Interval', 'kWh']
ev_train_pivot_temp = ev_train_pivot_temp[ev_train_pivot_temp_cols] 

In [7]:
##Checking Temporal Concatenation (pd.merge was failing)

check_aggs =['max', 'min','mean','count']

check_dict = {
    'Day' : check_aggs, 
    'Hour': check_aggs,
    'Half Hour': check_aggs,
}

temp_merge_check_df = ev_train_pivot_temp.groupby('House ID').agg(check_dict)

unique_records = len(temp_merge_check_df.drop_duplicates())
total_records = len(temp_merge_check_df)

assert unique_records == 1, "Problem with temporal concatenation"
assert total_records == 1590, "Problem with temporal concatenation"

display(ev_train_pivot_temp.iloc[45:50,])
display(ev_train_pivot_temp[ev_train_pivot_temp['House ID']==11655099].head())
display(temp_merge_check_df.head())

Unnamed: 0,House ID,Day,Hour,Half Hour,Interval,kWh
45,11628280,1,23,46,46,1.146
46,11628280,1,24,47,47,1.13
47,11628280,1,24,48,48,1.17
48,11628280,2,1,1,49,1.239
49,11628280,2,1,2,50,0.952


Unnamed: 0,House ID,Day,Hour,Half Hour,Interval,kWh
4533120,11655099,1,1,1,1,0.95
4533121,11655099,1,1,2,2,0.826
4533122,11655099,1,2,3,3,0.361
4533123,11655099,1,2,4,4,0.238
4533124,11655099,1,3,5,5,0.342


Unnamed: 0_level_0,Day,Day,Day,Day,Hour,Hour,Hour,Hour,Half Hour,Half Hour,Half Hour,Half Hour
Unnamed: 0_level_1,max,min,mean,count,max,min,mean,count,max,min,mean,count
House ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
11628280,60,1,30.5,2880,24,1,12.5,2880,48,1,24.5,2880
11628291,60,1,30.5,2880,24,1,12.5,2880,48,1,24.5,2880
11628301,60,1,30.5,2880,24,1,12.5,2880,48,1,24.5,2880
11628319,60,1,30.5,2880,24,1,12.5,2880,48,1,24.5,2880
11628335,60,1,30.5,2880,24,1,12.5,2880,48,1,24.5,2880


### Validation Split

### Feature Engineering