This script is used to train and compare prediction models for Alcohol Consumption & Sleep Data

In [None]:
#Import all the required packages
from numpy.lib.twodim_base import diagflat
import pymongo
import csv
import pandas as pd
import numpy as np
from sklearn import svm, tree
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
import pickle

In [None]:
#Import data and process dataframe

#Dictionary data with dimensionality of 4 vales (Time in and each rooms RSSI)
dimension_reduced_ble = {}

#Opens raw BLE Data
with open('/Users/pdmcguckian/Documents/IoT/BLEData.csv', 'r') as file:
    
    count = 0 #Keeps track of minute
    date = 12 #Keeps track of date
    bdrm = 0 #Daily bedrrom minutes
    lvngrm = 0 #Daily livingroom minutes
    ktchn = 0 #daily ktichen minutes
    real = False #flag to note if i entered the house on this data

    #Loops throuhgh every row of eacxh data
    reader = csv.reader(file)
    for row in reader:

        #If 1440 minutes have been processed, save days worth of data and reset to collect next tdat
        if count == 1440:
            
            if date == 27:
                date = 28

            dimension_reduced_ble[date] = [timein, lvngrm, bdrm, ktchn]
            count = 0
            bdrm = 0
            lvngrm = 0
            ktchn = 0
            date += 1
            timein = 0
            real = False

        #Increases minute counter
        count +=1

        #Try Excepts incase any values were missing or inccorect
        try:
            lvngrmRSSI = int(row[1])
        except:
            lvngrmRSSI = -100
        
        try:
            bdrmRSSI = int(row[2])
        except:
            bdrmRSSI = -100
        
        try:
            ktchnRSSI = int(row[3])
        except:
            ktchnRSSI = -100

        #If not in in ignore point
        if (bdrmRSSI == -100 and lvngrmRSSI == -100 and ktchnRSSI == -100):
            pass

        #If in bedroom
        elif (bdrmRSSI >= ktchnRSSI and bdrmRSSI >= lvngrmRSSI):
            bdrm+=1

            #First value in the house is saved as the time in the door
            if not real:
                timein = count
            real = True

        #If in livingroom
        elif (lvngrmRSSI >= bdrmRSSI and lvngrmRSSI >= ktchnRSSI):
            lvngrm += 1

            #First value in the house is saved as the time in the door
            if not real:
                timein = count
            real = True

        #If in kitchen
        elif (ktchnRSSI >= bdrmRSSI and ktchnRSSI >= lvngrmRSSI):
            ktchn+=1

            #First value in the house is saved as the time in the door
            if not real:
                timein = count
            real = True
        pass

print(dimension_reduced_ble)

In [None]:
#Loop to go add fitbit data to create dataset

#Array to store final dataset
dataset = []

#Loops through every days worth of data in predictiion set
with open('/Users/pdmcguckian/Documents/IoT/PredictionData.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        date = int(row[0])
        alcohol = int(row[1])
        sleep = int(row[2])

        #Converts sleep effiency to boolean value
        if sleep != 0:
            if sleep > 84:
                sleep_quality = 1

            else:
                sleep_quality = 0

            #Creates complete 6 valuye instance
            instance = [alcohol, sleep_quality, dimension_reduced_ble[date][0], dimension_reduced_ble[date][1], dimension_reduced_ble[date][2], dimension_reduced_ble[date][3]]
            
            #Adds it to dataset
            dataset.append(instance)

In [None]:
#Converts dataset to dataframe 
df = pd.DataFrame(dataset, columns=['Alcohol', 'Sleep', 'TimeIn', 'LivingRoom', 'Bedroom', 'Kitchen'])

#Shuffle datadet
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
print(df)

In [None]:
#Creates splits into x values, and Y values for sleep and alcohol

X = df.drop(columns=["Alcohol", "Sleep"])
y_s = df['Sleep']
y_a = df['Alcohol']

In [None]:
#Train and test SVM

clf = svm.SVC(kernel='linear', C=10)
scores_s = cross_val_score(clf, X, y_s, cv=3)
scores_a = cross_val_score(clf, X, y_a, cv=3)

print("Sleep - SVM: %0.2f accuracy with a standard deviation of %0.2f" % (scores_s.mean(), scores_s.std())) 
print("Alcohol - SVM: %0.2f accuracy with a standard deviation of %0.2f" % (scores_a.mean(), scores_a.std())) 


In [None]:
#Train and test Decison Tree

clf = tree.DecisionTreeClassifier(max_depth = 2, min_impurity_decrease=0.0)
scores_s = cross_val_score(clf, X, y_s, cv=3)
scores_a = cross_val_score(clf, X, y_a, cv=3)

print("Sleep - Decision Tree: %0.2f accuracy with a standard deviation of %0.2f" % (scores_s.mean(), scores_s.std()))
print("Alcohol - Decision Tree: %0.2f accuracy with a standard deviation of %0.2f" % (scores_a.mean(), scores_a.std()))

In [None]:
#Train and test logistic regression

clf = LogisticRegression(max_iter=1300, C=100)
scores_s = cross_val_score(clf, X, y_s, cv=3)
scores_a = cross_val_score(clf, X, y_a, cv=3)

print("Sleep - Logistic Regressioin: %0.2f accuracy with a standard deviation of %0.2f" % (scores_s.mean(), scores_s.std()))
print("Alcohol - Logistic Regressioin: %0.2f accuracy with a standard deviation of %0.2f" % (scores_a.mean(), scores_a.std()))

In [None]:
#Train and test ploynomial logistic regression

poly = PolynomialFeatures(degree = 2, interaction_only=False, include_bias=False)
X = poly.fit_transform(X)
clf = LogisticRegression(max_iter=1300, C=100)
scores_s = cross_val_score(clf, X, y_s, cv=3)
scores_a = cross_val_score(clf, X, y_a, cv=3)

print("Sleep - Poly Logistic Regressioin: %0.2f accuracy with a standard deviation of %0.2f" % (scores_s.mean(), scores_s.std()))
print("Alcohol - Poly Logistic Regressioin: %0.2f accuracy with a standard deviation of %0.2f" % (scores_a.mean(), scores_a.std()))