In [74]:
import torch
from torch import nn
## nn has all building blocks of pytorch, like layers, loss functions, etc.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split

torch.__version__

'2.0.1+cu117'

1: "data (prepare and load)",

2: "build model",

3: "fitting the model to data (training)",

4: "making predictions and evaluating a model (inference)",

5: "saving and loading a model",

6: "putting it all together"


In [61]:
data = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')
data.sample(10)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
339,340,Female,55,Nurse,8.1,9,75,4,Overweight,140/95,72,5000,Sleep Apnea
64,65,Male,32,Doctor,6.2,6,30,8,Normal,125/80,72,5000,
115,116,Female,37,Accountant,7.2,8,60,4,Normal,115/75,68,7000,
69,70,Female,33,Scientist,6.2,6,50,6,Overweight,128/85,76,5500,
296,297,Female,50,Nurse,6.1,6,90,8,Overweight,140/95,75,10000,Sleep Apnea
240,241,Female,44,Teacher,6.5,7,45,4,Overweight,135/90,65,6000,Insomnia
265,266,Female,48,Nurse,5.9,6,90,8,Overweight,140/95,75,10000,Sleep Apnea
67,68,Male,33,Doctor,6.0,6,30,8,Normal,125/80,72,5000,Insomnia
114,115,Female,37,Accountant,7.2,8,60,4,Normal,115/75,68,7000,
16,17,Female,29,Nurse,6.5,5,40,7,Normal Weight,132/87,80,4000,Sleep Apnea


In [62]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Blood Pressure           374 non-null    object 
 10  Heart Rate               374 non-null    int64  
 11  Daily Steps              374 non-null    int64  
 12  Sleep Disorder           374 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 38.1+ KB


In [63]:
encode1 = []
encode2 = []
for col in data.columns:
    if data[col].dtype == 'object':
        if len(data[col].unique()) == 2:
            encode1.append(col)
            print(col, data[col].unique())
        else:
            encode2.append(col)
            print(col, data[col].unique())
print("\nLinear encoding",encode1)
encode2.remove("Sleep Disorder")
encode2.remove("Blood Pressure")
print("One hot encoding",encode2)

Gender ['Male' 'Female']
Occupation ['Software Engineer' 'Doctor' 'Sales Representative' 'Teacher' 'Nurse'
 'Engineer' 'Accountant' 'Scientist' 'Lawyer' 'Salesperson' 'Manager']
BMI Category ['Overweight' 'Normal' 'Obese' 'Normal Weight']
Blood Pressure ['126/83' '125/80' '140/90' '120/80' '132/87' '130/86' '117/76' '118/76'
 '128/85' '131/86' '128/84' '115/75' '135/88' '129/84' '130/85' '115/78'
 '119/77' '121/79' '125/82' '135/90' '122/80' '142/92' '140/95' '139/91'
 '118/75']
Sleep Disorder ['None' 'Sleep Apnea' 'Insomnia']

Linear encoding ['Gender']
One hot encoding ['Occupation', 'BMI Category']


In [64]:
data[["Systolic BP", "Diastolic BP"]] = data['Blood Pressure'].str.split('/', expand=True)
data["Systolic BP"] = data["Systolic BP"].astype('int')
data["Diastolic BP"] = data["Diastolic BP"].astype('int')
data.drop('Blood Pressure',axis=1,inplace=True)

data.sample(5)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,Systolic BP,Diastolic BP
281,282,Female,50,Nurse,6.1,6,90,8,Overweight,75,10000,Sleep Apnea,140,95
24,25,Male,30,Doctor,7.8,7,75,6,Normal,70,8000,,120,80
248,249,Male,44,Salesperson,6.4,6,45,7,Overweight,72,6000,,130,85
349,350,Female,57,Nurse,8.1,9,75,3,Overweight,68,7000,Sleep Apnea,140,95
318,319,Female,53,Engineer,8.4,9,30,3,Normal,65,5000,,125,80


In [65]:
data = pd.get_dummies(data, columns=encode2, drop_first=True)
data.sample(5)

Unnamed: 0,Person ID,Gender,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps,Sleep Disorder,...,Occupation_Manager,Occupation_Nurse,Occupation_Sales Representative,Occupation_Salesperson,Occupation_Scientist,Occupation_Software Engineer,Occupation_Teacher,BMI Category_Normal Weight,BMI Category_Obese,BMI Category_Overweight
203,204,Male,43,6.9,6,47,7,69,6800,,...,0,0,0,0,0,0,0,1,0,0
109,110,Male,37,7.4,8,60,5,68,8000,,...,0,0,0,0,0,0,0,0,0,0
46,47,Male,31,7.7,7,75,6,70,8000,,...,0,0,0,0,0,0,0,0,0,0
154,155,Male,39,7.2,8,60,5,68,8000,,...,0,0,0,0,0,0,0,0,0,0
110,111,Female,37,7.2,8,60,4,68,7000,,...,0,0,0,0,0,0,0,0,0,0


In [66]:
data = pd.get_dummies(data, columns=encode1, drop_first=True)
data.sample(5)

Unnamed: 0,Person ID,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps,Sleep Disorder,Systolic BP,...,Occupation_Nurse,Occupation_Sales Representative,Occupation_Salesperson,Occupation_Scientist,Occupation_Software Engineer,Occupation_Teacher,BMI Category_Normal Weight,BMI Category_Obese,BMI Category_Overweight,Gender_Male
53,54,32,7.6,7,75,6,70,8000,,120,...,0,0,0,0,0,0,0,0,0,1
364,365,59,8.0,9,75,3,68,7000,Sleep Apnea,140,...,1,0,0,0,0,0,0,0,1,0
367,368,59,8.0,9,75,3,68,7000,Sleep Apnea,140,...,1,0,0,0,0,0,0,0,1,0
300,301,51,8.5,9,30,3,65,5000,,125,...,0,0,0,0,0,0,0,0,0,0
7,8,29,7.8,7,75,6,70,8000,,120,...,0,0,0,0,0,0,0,0,0,1


In [67]:
data.drop('Person ID',axis=1,inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 24 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              374 non-null    int64  
 1   Sleep Duration                   374 non-null    float64
 2   Quality of Sleep                 374 non-null    int64  
 3   Physical Activity Level          374 non-null    int64  
 4   Stress Level                     374 non-null    int64  
 5   Heart Rate                       374 non-null    int64  
 6   Daily Steps                      374 non-null    int64  
 7   Sleep Disorder                   374 non-null    object 
 8   Systolic BP                      374 non-null    int64  
 9   Diastolic BP                     374 non-null    int64  
 10  Occupation_Doctor                374 non-null    uint8  
 11  Occupation_Engineer              374 non-null    uint8  
 12  Occupation_Lawyer     

In [68]:
data.describe()

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps,Systolic BP,Diastolic BP,Occupation_Doctor,...,Occupation_Nurse,Occupation_Sales Representative,Occupation_Salesperson,Occupation_Scientist,Occupation_Software Engineer,Occupation_Teacher,BMI Category_Normal Weight,BMI Category_Obese,BMI Category_Overweight,Gender_Male
count,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0,...,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0
mean,42.184492,7.132086,7.312834,59.171123,5.385027,70.165775,6816.84492,128.553476,84.649733,0.18984,...,0.195187,0.005348,0.085561,0.010695,0.010695,0.106952,0.05615,0.026738,0.395722,0.505348
std,8.673133,0.795657,1.196956,20.830804,1.774526,4.135676,1617.915679,7.748118,6.161611,0.392699,...,0.396876,0.073029,0.28009,0.103001,0.103001,0.309466,0.230519,0.161533,0.48966,0.500641
min,27.0,5.8,4.0,30.0,3.0,65.0,3000.0,115.0,75.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,35.25,6.4,6.0,45.0,4.0,68.0,5600.0,125.0,80.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,43.0,7.2,7.0,60.0,5.0,70.0,7000.0,130.0,85.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,50.0,7.8,8.0,75.0,7.0,72.0,8000.0,135.0,90.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
max,59.0,8.5,9.0,90.0,8.0,86.0,10000.0,142.0,95.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [69]:
data.isnull().sum()

Age                                0
Sleep Duration                     0
Quality of Sleep                   0
Physical Activity Level            0
Stress Level                       0
Heart Rate                         0
Daily Steps                        0
Sleep Disorder                     0
Systolic BP                        0
Diastolic BP                       0
Occupation_Doctor                  0
Occupation_Engineer                0
Occupation_Lawyer                  0
Occupation_Manager                 0
Occupation_Nurse                   0
Occupation_Sales Representative    0
Occupation_Salesperson             0
Occupation_Scientist               0
Occupation_Software Engineer       0
Occupation_Teacher                 0
BMI Category_Normal Weight         0
BMI Category_Obese                 0
BMI Category_Overweight            0
Gender_Male                        0
dtype: int64

In [70]:
# Inspecting Target Variable
data['Sleep Disorder'].value_counts()

# Encoding Target variable
data['Sleep Disorder'] = data['Sleep Disorder'].map({'None':0, 'Insomnia':1, 'Sleep Apnea':2})
data.sample(20)

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps,Sleep Disorder,Systolic BP,Diastolic BP,...,Occupation_Nurse,Occupation_Sales Representative,Occupation_Salesperson,Occupation_Scientist,Occupation_Software Engineer,Occupation_Teacher,BMI Category_Normal Weight,BMI Category_Obese,BMI Category_Overweight,Gender_Male
236,44,6.4,6,45,7,72,6000,1,130,85,...,0,0,1,0,0,0,0,0,1,1
190,43,6.7,7,45,4,65,6000,1,135,90,...,0,0,0,0,0,1,0,0,1,0
234,44,6.6,7,45,4,65,6000,1,135,90,...,0,0,0,0,0,1,0,0,1,0
82,35,6.7,7,40,5,70,5600,0,128,84,...,0,0,0,0,0,1,0,0,1,1
237,44,6.5,7,45,4,65,6000,1,135,90,...,0,0,0,0,0,1,0,0,1,0
71,33,6.1,6,30,8,72,5000,0,125,80,...,0,0,0,0,0,0,0,0,0,1
226,44,6.6,7,45,4,65,6000,1,135,90,...,0,0,0,0,0,1,0,0,1,0
182,42,7.8,8,90,5,70,8000,0,130,85,...,0,0,0,0,0,0,0,0,0,1
171,41,7.7,8,90,5,70,8000,0,130,85,...,0,0,0,0,0,0,0,0,0,1
13,29,6.0,6,30,8,70,8000,0,120,80,...,0,0,0,0,0,0,0,0,0,1


In [73]:
data.dtypes

Age                                  int64
Sleep Duration                     float64
Quality of Sleep                     int64
Physical Activity Level              int64
Stress Level                         int64
Heart Rate                           int64
Daily Steps                          int64
Sleep Disorder                       int64
Systolic BP                          int64
Diastolic BP                         int64
Occupation_Doctor                    uint8
Occupation_Engineer                  uint8
Occupation_Lawyer                    uint8
Occupation_Manager                   uint8
Occupation_Nurse                     uint8
Occupation_Sales Representative      uint8
Occupation_Salesperson               uint8
Occupation_Scientist                 uint8
Occupation_Software Engineer         uint8
Occupation_Teacher                   uint8
BMI Category_Normal Weight           uint8
BMI Category_Obese                   uint8
BMI Category_Overweight              uint8
Gender_Male

### Data split

In [80]:
y = data['Sleep Disorder']
X = data.drop('Sleep Disorder',axis=1)

# Splitting the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=69, stratify=y)


print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


# Converting to tensors
X_train = torch.FloatTensor(X_train.values)
X_test = torch.FloatTensor(X_test.values)
y_train = torch.LongTensor(y_train.values)
y_test = torch.LongTensor(y_test.values)


print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(280, 23) (94, 23) (280,) (94,)
torch.Size([280, 23]) torch.Size([94, 23]) torch.Size([280]) torch.Size([94])
