In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
from pathlib import Path
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
paths = list(Path('/kaggle/input/smart-building-system/KETI/').rglob('*.*'))
light_paths = [path for path in paths if path.name == 'light.csv']
temperature_paths = [path for path in paths if path.name == 'temperature.csv']
co2_paths = [path for path in paths if path.name == 'co2.csv']
pir_paths = [path for path in paths if path.name == 'pir.csv']
humidity_paths = [path for path in paths if path.name == 'humidity.csv']

In [None]:
frames = []
for light_path, temperature_path, co2_path, pir_path, humidity_path in zip(light_paths, temperature_paths, co2_paths, pir_paths, humidity_paths):
    light_df = pd.read_csv(light_path, names=['unix_time', 'light'], index_col='unix_time')
    temperature_df = pd.read_csv(temperature_path, names=['unix_time', 'temperature'], index_col='unix_time')
    co2_df = pd.read_csv(co2_path, names=['unix_time', 'co2'], index_col='unix_time')
    pir_df = pd.read_csv(pir_path, names=['unix_time', 'pir'], index_col='unix_time')
    humidity_df = pd.read_csv(humidity_path, names=['unix_time', 'humidity'], index_col='unix_time')
    df = pd.concat([light_df, temperature_df, co2_df, pir_df, humidity_df], axis=1)
    df['room'] = light_path.parent.name
    frames.append(df)
data = pd.concat(frames)

In [None]:
data.describe()

In [None]:
data.isnull().sum() 

In [None]:
data.corr()

In [None]:
p= data.corr()
sns.heatmap(p)

In [None]:
data = data.dropna(subset=['pir'])
y= data['pir']
y = y.apply(lambda x: 0 if x == 0 else 1)
X = data.drop(['pir','room'], axis=1)

In [None]:
from sklearn.impute import SimpleImputer
my_imputer = SimpleImputer()
X = my_imputer.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score,accuracy_score
clf= LogisticRegression()
clf.fit(X_train,y_train)
y_pred= clf.predict(X_test)
print('F1 Score',f1_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test,y_pred)*100)


In [None]:
from xgboost import XGBClassifier
clf= XGBClassifier()
clf.fit(X_train,y_train)
y_pred= clf.predict(X_test)
print('F1 Score',f1_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test,y_pred)*100)

In [None]:
from sklearn import svm
clf = svm.SVC(kernel="linear")
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print('F1 Score',f1_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test,y_pred)*100)

In [None]:
from sklearn.linear_model import PassiveAggressiveClassifier
clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,tol=1e-3)
clf.fit(X_train, y_train)
y_pred= clf.predict(X_test)
print('F1 Score',f1_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test,y_pred)*100)

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print('F1 Score',f1_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test,y_pred)*100)

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(max_depth=2, random_state=0)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print('F1 Score',f1_score(y_test, y_pred))
print('Accuracy',accuracy_score(y_test,y_pred)*100)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
for i in range (1,20):
    knn2 = KNeighborsClassifier(n_neighbors=i)
    knn2.fit(X_train, y_train)
    print("For k = %d accuracy is"%i,knn2.score(X_test,y_test))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn1 = KNeighborsClassifier(n_neighbors=3)
knn1.fit(X_train, y_train)
y_pred = knn1.predict(X_test)
print("Score is",knn1.score(X_test,y_test)*100)

In [None]:
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
# baseline model
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [None]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))