In [19]:
# Import required libraries and dependencies
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pydotplus
from IPython.display import Image
from sklearn.neighbors import KNeighborsClassifier

In [2]:
sleep_df = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')
sleep_df.fillna('None',inplace=True)

In [3]:
sleep_df['BMI Category'] = [category if category != 'Normal Weight' else 'Normal' for category in sleep_df['BMI Category']]
sleep_df['Systolic Pressure'] = [int(string[:3]) for string in sleep_df['Blood Pressure']]
sleep_df['Diastolic Pressure'] = [int(string[-2:]) for string in sleep_df['Blood Pressure']]
disorder = sleep_df['Sleep Disorder']
sleep_df.drop(columns=['Blood Pressure','Sleep Disorder','Occupation','Person ID'],inplace=True)

In [4]:
y = disorder.values
X = pd.get_dummies(sleep_df)
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
X_train_scaled = StandardScaler().fit_transform(X_train)
X_test_scaled = StandardScaler().fit_transform(X_test)

In [6]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

# Fitting the model
model = model.fit(X_train_scaled, y_train)

# Making predictions using the testing data
predictions = model.predict(X_test_scaled)

In [7]:
predictions
pd.DataFrame(y_test).value_counts()

None           52
Sleep Apnea    24
Insomnia       18
Name: count, dtype: int64

In [8]:
cm = confusion_matrix(y_test, predictions)
cm

array([[16,  1,  1],
       [ 6, 45,  1],
       [ 3,  7, 14]])

In [15]:
# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=1000)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

In [30]:
confusion_matrix(y_test, predictions)


array([[16,  1,  1],
       [ 0, 51,  1],
       [ 3,  2, 19]])

In [17]:
# Random Forests in sklearn will automatically calculate feature importance
importances = rf_model.feature_importances_
# We can sort the features by their importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.14944740275754057, 'Diastolic Pressure'),
 (0.14204171426054504, 'BMI Category_Normal'),
 (0.1375580909257498, 'Systolic Pressure'),
 (0.10827749695330986, 'Sleep Duration'),
 (0.10230008623290848, 'Age'),
 (0.09287382587162991, 'BMI Category_Overweight'),
 (0.06432790927534757, 'Daily Steps'),
 (0.05857596593533887, 'Heart Rate'),
 (0.0575637499154994, 'Physical Activity Level'),
 (0.03636127627634627, 'Stress Level'),
 (0.032845493155802395, 'Quality of Sleep'),
 (0.0076958029292239355, 'Gender_Male'),
 (0.007059759350583153, 'Gender_Female'),
 (0.0030714261601748204, 'BMI Category_Obese')]

In [36]:
# Instantiate the model with k = 3 neighbors
model = KNeighborsClassifier(n_neighbors=3)

# Train the model
model.fit(X_train_scaled, y_train)

# Create predictions
y_pred = model.predict(X_test_scaled)

# Review the predictions
y_pred

array(['Sleep Apnea', 'Sleep Apnea', 'None', 'Sleep Apnea', 'Sleep Apnea',
       'None', 'None', 'Sleep Apnea', 'None', 'Insomnia', 'None',
       'Sleep Apnea', 'Sleep Apnea', 'None', 'None', 'Insomnia', 'None',
       'Sleep Apnea', 'None', 'Sleep Apnea', 'None', 'None', 'None',
       'Insomnia', 'None', 'Insomnia', 'None', 'None', 'None', 'None',
       'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None',
       'Sleep Apnea', 'Insomnia', 'None', 'None', 'Insomnia', 'None',
       'None', 'None', 'None', 'Insomnia', 'None', 'Insomnia', 'None',
       'None', 'None', 'Insomnia', 'None', 'Sleep Apnea', 'None',
       'Insomnia', 'Sleep Apnea', 'None', 'Sleep Apnea', 'Sleep Apnea',
       'None', 'None', 'None', 'None', 'None', 'Sleep Apnea', 'None',
       'None', 'Insomnia', 'Insomnia', 'None', 'None', 'None', 'Insomnia',
       'Insomnia', 'Sleep Apnea', 'Insomnia', 'None', 'None', 'None',
       'Insomnia', 'None', 'Insomnia', 'Insomnia', 'None', 'None',
       'Sleep 

In [37]:
confusion_matrix(y_test,y_pred)

array([[17,  1,  0],
       [ 0, 51,  1],
       [ 3,  3, 18]])