In [1]:
import pandas as pd
import numpy as np
import time
import pickle
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier

In [2]:
# reading data
IoT = pd.read_csv('../../data/test_garage.csv')
# IoT = IoT.iloc[0:500]
print(IoT.info())
print()
print(IoT.head())
# processing data ----------------------------------------------------
# drop NaN values
IoT = IoT.dropna()

# encode string data to numeric
encoder=LabelEncoder()
IoT['type']=encoder.fit_transform(IoT['type'])
IoT['state']=encoder.fit_transform(IoT['state'])
IoT['sphone_signal']=encoder.fit_transform(IoT['sphone_signal'])
# --------------------------------------------------------------------

# dividing into input and output variables
x = IoT.drop(['label', 'date', 'time', 'type'], axis=1)
y = IoT['type']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102951 entries, 0 to 102950
Data columns (total 6 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   date           102951 non-null  object
 1   time           102951 non-null  object
 2   state          102951 non-null  object
 3   sphone_signal  102951 non-null  object
 4   label          102951 non-null  object
 5   type           102951 non-null  object
dtypes: object(6)
memory usage: 4.7+ MB
None

       date      time   state sphone_signal label    type
0  1-Apr-19  20:53:44    open          true     0  normal
1  1-Apr-19  20:53:49  closed         false     0  normal
2  1-Apr-19  20:53:49    open          true     0  normal
3  1-Apr-19  20:53:54  closed         false     0  normal
4  1-Apr-19  20:53:54    open          true     0  normal


In [3]:
# Fit given model with the data and print required metrics
log = open('../metrics_summary.log', 'w')
log.write('')
log.close()

def run(x, y, model, name):
    # initiate log file instance
    log = open('../metrics_summary.log', 'a')

    # save model
    filename = f'../h5s/{name}.h5'
    pickle.dump(model, open(filename, 'wb'))

    x_train,x_test,y_train,y_test = train_test_split(x, y, test_size=0.2)
    
    # fitting model and calculating time consumed
    start = time.time()
    model.fit(x_train,y_train)
    end = time.time()

    # prediction
    y_pred = model.predict(x_test)

    # calculate required metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    recall = recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    f1_score_value = f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    time_consumed = end-start

    # print calculated metrics
    log.write(f'{name}---->\n')
    log.write(f'Test accuracy: {accuracy}\n')
    log.write(f'Test precision: {precision}\n')
    log.write(f'Test recall: {recall}\n')
    log.write(f'Test f1_score: {f1_score_value}\n')
    log.write(f'Time taken in fitting: {time_consumed}\n')
    log.write('\n\n')
    log.close()

In [4]:
# # LSTM
# from keras.models import Sequential
# from keras.layers import Dense, Dropout, LSTM, Flatten

# lstm = Sequential()
# lstm.add(LSTM(units = 128, activation='tanh', return_sequences=True, input_shape = (features,1)))
# lstm.add(Dropout(0.2))
# lstm.add(LSTM(units = 64, activation='tanh', return_sequences=True))
# lstm.add(Dropout(0.2))
# lstm.add(Dense(1, activation='sigmoid')) 
# lstm.add(Flatten())
# lstm.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# # predictions
# predictions = model1.predict(x_test)
# y_pred = [round(x[0]) for x in predictions]

In [5]:
# # GRU
# from keras.models import Sequential
# from keras.layers import Dense, Dropout, GRU, Flatten

# gru = Sequential()
# gru.add(GRU(units = 128, activation='tanh', return_sequences=True, input_shape = (features,1)))
# gru.add(Dropout(0.2))
# gru.add(GRU(units = 64, activation='tanh', return_sequences=True))
# gru.add(Dropout(0.2))
# gru.add(Dense(1, activation='sigmoid'))
# gru.add(Flatten())
# gru.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# # prediction
# predictions = model.predict(x_test)
# y_pred = [round(x[0]) for x in predictions]

In [6]:
# CART
from sklearn.tree import DecisionTreeClassifier
cart = DecisionTreeClassifier(criterion='gini')

In [7]:
# kNN
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=2)

In [8]:
# LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()

In [9]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [10]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)

In [11]:
# Support Vector Machine
from sklearn.svm import SVC
svc = SVC(probability=False)

In [12]:
run(x, y, cart, 'CART')
run(x, y, knn, 'kNN')
run(x, y, lda, 'LDA')
run(x, y, lr, 'LR')
run(x, y, rf, 'RF')
run(x, y, svc, 'SVC')

In [13]:
# ensemble model
voting = VotingClassifier(estimators=[('RF', rf), ('LR', lr), ('LDA', lda), ('KNN', knn), ('CART', cart), ('SVC', svc)], voting='hard')

run(x,y, voting, 'VC')