In [5]:
#importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sleep Disorder Prediction
The aim of the project is to analyze the person's lifestyles and medical variables such as age, BMI, physical activity, sleep duration, blood pressure, and many more, to predict the sleep disorder and its type.

## Data Dictionary

| Column Name | Description |
| --- | --- |
| Person_ID | Unique ID assigned to each person |
| Gender | The gender of the person (Male/Female) |
| Age | Age of the person in years |
| Occupation | The occupation of the person |
| Sleep_duration | The duration of sleep of the person in hours |
| Quality_of_sleep | A subjective rating of the quality of sleep, ranging from 1 to 10 |
| Physical_activity | The level of physical activity of the person (Low/Medium/High) |
| Stress Level | A subjective rating of the stress level, ranging from 1 to 10 |
| BMI_category | The BMI category of the person (Underweight/Normal/Overweight/Obesity) |
| Blood_pressure | The blood pressure of the person in mmHg |
| Heart_rate | The heart rate of the person in beats per minute |
| Daily Steps | The number of steps taken by the person per day |
| Sleep_disorder | The presence or absence of a sleep disorder in the person (None, Insomnia, Sleep Apnea) |


In [7]:
# loading the datatset
df = pd.read_csv('SleepDataSet.csv')
df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [None]:
# checking for missing values. df = data frame

df.isnull().sum() #checks each column for missing values, outputting the amount missing in each column

Person ID                    0
Gender                       0
Age                          0
Occupation                   0
Sleep Duration               0
Quality of Sleep             0
Physical Activity Level      0
Stress Level                 0
BMI Category                 0
Blood Pressure               0
Heart Rate                   0
Daily Steps                  0
Sleep Disorder             219
dtype: int64

In [9]:
# replacing the null values w 'None'in the column 'Sleep Disorder'
df['Sleep Disorder'].fillna('None', inplace= True)

## Data Preprocessing Part 1

In [11]:
# splitting the blood pressure into two columns

df['systolic_bp'] = df['Blood Pressure'].apply(lambda Blood_pressure: Blood_pressure.split('/')[0])

df['diastolic_bp'] = df['Blood Pressure'].apply(lambda Blood_pressure: Blood_pressure.split('/')[1])

# dropping the blood pressure column
df.drop('Blood Pressure', axis=1, inplace=True)


In [12]:
#set style of graph
sns.set_theme(style = "darkgrid")
%matplotlib inline

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.drop('Sleep Disorder', axis=1), df['Sleep Disorder'], test_size=0.3, random_state=42)


## Model Building

To predict sleep disorders using classification algorithms, we will use:
1. Decision Tree Classifier
2. Random Forest Classifier


### Decision Tree Classifier

In [14]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier()
dtree # creates instance of a decision tree classifier, stored in dtree