# Applying Machine Learning and Deep Learning to identify home appliances consuming excess power

## Copyright (c) 2018, Faststream Technologies
## Author: Sudhanva Narayana

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib

### Import dataset ignoring headers

In [2]:
df = pd.read_csv('../data/home_data_test.csv')

### Dataset

In [3]:
df.head()

Unnamed: 0,device,building,room,weather_type,date,from_time,to_time,time,no_of_people,time_stayed_mins
0,AC,1,101,very hot,2018-01-02,14:00:00,15:00:00,afternoon,7,27
1,AC,1,101,very hot,2018-01-03,01:00:00,02:00:00,midnight,11,32
2,AC,1,101,very hot,2018-01-03,02:00:00,03:00:00,midnight,0,0
3,AC,1,101,very hot,2018-01-03,11:00:00,12:00:00,morning,7,4
4,AC,1,101,very hot,2018-01-03,18:00:00,19:00:00,evening,2,17


### Importing dataset

In [4]:
X = df.iloc[:, [0, 4, 5, 6, 7, 8, 9]].values

### Encoding Categorical Variables

In [5]:
# Encoding categorical data
labelencoder_X_0 = LabelEncoder()
X[:, 0] = labelencoder_X_0.fit_transform(X[:, 0])
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
labelencoder_X_3 = LabelEncoder()
X[:, 3] = labelencoder_X_3.fit_transform(X[:, 3])
labelencoder_X_4 = LabelEncoder()
X[:, 4] = labelencoder_X_4.fit_transform(X[:, 4])

onehotencoder = OneHotEncoder(categorical_features=[0, 1, 2, 3, 4])
hot_X = onehotencoder.fit_transform(X).toarray()

### Avoiding the dummy variable trap

In [6]:
columns = df.columns
dummies = []
dummies_sum = 0
categories = [0, 1, 2, 3, 4]

for category in categories:
    dummies_sum += category * (df.iloc[:, category].unique().size)
    dummies.append(dummies_sum)
    
# Removing dummy variables
hot_X = np.delete(hot_X, dummies, 1)

  # This is added back by InteractiveShellApp.init_path()


In [7]:
X_train = hot_X

### Multiple Linear Regression

In [8]:
regressor = joblib.load('../model/multiple_reg.pkl') 

In [9]:
y_pred = regressor.predict(X_train)
y_pred

array([1497.1223527 , 1418.39733958, 1472.69758577, ...,  281.72848997,
        233.33928556,  208.47670077])

In [10]:
df['power'] = y_pred
df.to_csv('home_data_predict.csv', index=False)