# Title

## Copyright (c) 2018, Faststream Technologies
## Author: Sudhanva Narayana

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

### Import dataset ignoring headers

In [2]:
df = pd.read_csv('../data/home_final.csv')

### Dataset

In [3]:
df.head()

Unnamed: 0,device,building,floor,room,type,power,date,from_time,to_time,no_of_people,time_stayed_mins,target
0,AC,1,1,101,indoor,1363,01-01-2018,04:00:00,05:00:00,9,7,1
1,Computer,1,1,101,indoor,487,27-01-2018,08:00:00,09:00:00,0,0,0
2,Computer,1,1,101,indoor,464,29-01-2018,05:00:00,06:00:00,1,16,0
3,Computer,1,1,101,indoor,389,30-01-2018,01:00:00,02:00:00,8,27,1
4,Computer,1,1,101,indoor,338,30-01-2018,08:00:00,09:00:00,2,4,1


### Importing dataset

In [4]:
X = df.iloc[:, [0, 3, 4, 5, 6, 7, 9, 10]].values
y = df.iloc[:, 11].values

### Encoding Categorical Variables

In [5]:
# Encoding categorical data
labelencoder_X_0 = LabelEncoder()
X[:, 0] = labelencoder_X_0.fit_transform(X[:, 0])
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
labelencoder_X_4 = LabelEncoder()
X[:, 4] = labelencoder_X_4.fit_transform(X[:, 4])
labelencoder_X_5 = LabelEncoder()
X[:, 5] = labelencoder_X_5.fit_transform(X[:, 5])

onehotencoder = OneHotEncoder(categorical_features=[0, 1, 2, 4, 5])
hot_X = onehotencoder.fit_transform(X).toarray()

### Avoiding the dummy variable trap

In [6]:
columns = df.columns
dummies = []
dummies_sum = 0
categories = [0, 1, 2, 4, 5]

for category in categories:
    dummies_sum += category * (df.iloc[:, category].unique().size)
    dummies.append(dummies_sum)
    
# Removing dummy variables
hot_X = np.delete(hot_X, dummies, 1)

  # This is added back by InteractiveShellApp.init_path()


### Splitting the dataset into the Training set and Test set (75%, 25%)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

### Feature Scaling

In [8]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)



### Fitting classifier to the Training set

In [9]:
classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False)

### Predicting the Test set results

In [10]:
y_pred = classifier.predict(X_test)

print(classifier.score(X_test, y_test))

0.7196620583717358


### The Confusion Matrix

In [11]:
print(confusion_matrix(y_test, y_pred))

[[461 193]
 [172 476]]


In [12]:
print(classification_report(y_test, y_pred))

             precision    recall  f1-score   support

          0       0.73      0.70      0.72       654
          1       0.71      0.73      0.72       648

avg / total       0.72      0.72      0.72      1302

