In [42]:
# Import Modules
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler

# Display all of the columns
pd.set_option('display.max_columns', None)

In [43]:
# Loading data
sleep_df = pd.read_csv("sleep_df_dummies.csv")

sleep_df.head()

Unnamed: 0,ID,Age,Gender,Bedtime Hour,Wakeup Time,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Smoking status,Exercise frequency,Female,Male,Late Bedtime,Normal Bedtime,Really Late Bedtime,Non-Smoker,Smoker,Early Wakeup,Late Wakeup,Normal Wakeup,Very Early Wakeup
0,1,65,Female,Really Late Bedtime,Early Wakeup,7.5,Efficient,28.0,60.0,12.0,3.0,50,0,Non-Smoker,1,1,0,0,0,1,1,0,1,0,0,0
1,2,69,Male,Really Late Bedtime,Normal Wakeup,5.5,Inefficient,22.0,22.0,56.0,2.0,0,0,Non-Smoker,0,0,1,0,0,1,1,0,0,0,1,0
2,4,40,Female,Really Late Bedtime,Normal Wakeup,7.0,Efficient,25.0,55.0,20.0,1.0,0,5,Smoker,2,1,0,0,0,1,0,1,0,0,1,0
3,5,57,Male,Really Late Bedtime,Normal Wakeup,8.0,Inefficient,18.0,35.0,47.0,1.0,0,0,Smoker,3,0,1,0,0,1,0,1,0,0,1,0
4,6,36,Female,Normal Bedtime,Very Early Wakeup,9.0,Efficient,25.0,60.0,15.0,0.0,75,0,Smoker,2,1,0,0,1,0,0,1,0,0,0,1


In [44]:
# Drop redundant and not needed columns

sleep_df_clean = sleep_df.drop(columns= ["ID", "Age", "Gender","Bedtime Hour","Wakeup Time", "REM sleep percentage",
                                         "Deep sleep percentage", "Light sleep percentage",
                                         "Awakenings", "Smoking status"])

sleep_df_clean.tail()

Unnamed: 0,Sleep duration,Sleep efficiency,Caffeine consumption,Alcohol consumption,Exercise frequency,Female,Male,Late Bedtime,Normal Bedtime,Really Late Bedtime,Non-Smoker,Smoker,Early Wakeup,Late Wakeup,Normal Wakeup,Very Early Wakeup
388,5.0,Inefficient,95,3,0,0,1,0,0,1,1,0,1,0,0,0
389,5.0,Inefficient,95,0,3,1,0,0,0,1,1,0,0,0,1,0
390,6.0,Efficient,0,0,0,1,0,0,0,1,0,1,0,0,1,0
391,8.0,Efficient,0,0,4,0,1,0,0,1,0,1,0,0,1,0
392,8.0,Efficient,0,0,5,0,1,0,1,0,0,1,0,0,0,1


In [45]:
# Seperate the features, X,  from the target variable, y
y = sleep_df_clean['Sleep efficiency']
X = sleep_df_clean.drop(columns='Sleep efficiency')

In [46]:
# Split the dataset using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [47]:
# Create the StandardScaler instance
scaler = StandardScaler()
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [68]:
# Instantiate the model with k = 3 neighbors
model = KNeighborsClassifier(n_neighbors=3)

In [69]:
 # Train the model
model.fit(X_train, y_train)

In [70]:
# Create predictions
y_pred = model.predict(X_test_scaled)

# Review the predictions
y_pred



array(['Efficient', 'Efficient', 'Inefficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Inefficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Inefficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Inefficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Inefficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficient', 'Efficient',
       'Efficient', 'Efficient', 'Efficient', 'Efficie

In [71]:
# Print confusion matrix
confusion_matrix(y_pred,y_test)

array([[67, 26],
       [ 0,  6]], dtype=int64)

In [72]:
# Print classification report
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

   Efficient       1.00      0.72      0.84        93
 Inefficient       0.19      1.00      0.32         6

    accuracy                           0.74        99
   macro avg       0.59      0.86      0.58        99
weighted avg       0.95      0.74      0.81        99

