# Sleep Quality Predictors.Model Classification

In [1]:
import pandas as pd
df=pd.read_csv('sleep_cycle_productivity.csv')
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder,LabelEncoder,OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler,StandardScaler,RobustScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split,KFold, cross_val_score

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Date                           5000 non-null   object 
 1   Person_ID                      5000 non-null   int64  
 2   Age                            5000 non-null   int64  
 3   Gender                         5000 non-null   object 
 4   Sleep Start Time               5000 non-null   float64
 5   Sleep End Time                 5000 non-null   float64
 6   Total Sleep Hours              5000 non-null   float64
 7   Sleep Quality                  5000 non-null   int64  
 8   Exercise (mins/day)            5000 non-null   int64  
 9   Caffeine Intake (mg)           5000 non-null   int64  
 10  Screen Time Before Bed (mins)  5000 non-null   int64  
 11  Work Hours (hrs/day)           5000 non-null   float64
 12  Productivity Score             5000 non-null   i

In [3]:
df.head()

Unnamed: 0,Date,Person_ID,Age,Gender,Sleep Start Time,Sleep End Time,Total Sleep Hours,Sleep Quality,Exercise (mins/day),Caffeine Intake (mg),Screen Time Before Bed (mins),Work Hours (hrs/day),Productivity Score,Mood Score,Stress Level
0,2024-04-12,1860,32,Other,23.33,4.61,5.28,3,86,87,116,8.80892,8,3,6
1,2024-11-04,1769,41,Female,21.02,2.43,5.41,5,32,21,88,6.329833,10,3,7
2,2024-08-31,2528,20,Male,22.1,3.45,5.35,7,17,88,59,8.506306,10,9,10
3,2024-02-22,8041,37,Other,23.1,6.65,7.55,8,46,34,80,6.07024,8,4,2
4,2024-02-23,4843,46,Other,21.42,4.17,6.75,10,61,269,94,11.374994,8,7,9


In [4]:
df.isnull().sum()         # tushib qolgan qiymatlar yo'q

Date                             0
Person_ID                        0
Age                              0
Gender                           0
Sleep Start Time                 0
Sleep End Time                   0
Total Sleep Hours                0
Sleep Quality                    0
Exercise (mins/day)              0
Caffeine Intake (mg)             0
Screen Time Before Bed (mins)    0
Work Hours (hrs/day)             0
Productivity Score               0
Mood Score                       0
Stress Level                     0
dtype: int64

In [5]:
print(df.columns)

Index(['Date', 'Person_ID', 'Age', 'Gender', 'Sleep Start Time',
       'Sleep End Time', 'Total Sleep Hours', 'Sleep Quality',
       'Exercise (mins/day)', 'Caffeine Intake (mg)',
       'Screen Time Before Bed (mins)', 'Work Hours (hrs/day)',
       'Productivity Score', 'Mood Score', 'Stress Level'],
      dtype='object')


In [6]:
df.drop('Date', axis=1, inplace=True)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Person_ID                      5000 non-null   int64  
 1   Age                            5000 non-null   int64  
 2   Gender                         5000 non-null   object 
 3   Sleep Start Time               5000 non-null   float64
 4   Sleep End Time                 5000 non-null   float64
 5   Total Sleep Hours              5000 non-null   float64
 6   Sleep Quality                  5000 non-null   int64  
 7   Exercise (mins/day)            5000 non-null   int64  
 8   Caffeine Intake (mg)           5000 non-null   int64  
 9   Screen Time Before Bed (mins)  5000 non-null   int64  
 10  Work Hours (hrs/day)           5000 non-null   float64
 11  Productivity Score             5000 non-null   int64  
 12  Mood Score                     5000 non-null   i

In [8]:
def categorize_sleep_quality(score):
    if score <= 2:
        return 'Poor'
    elif score <= 5:
        return 'Okay'
    elif score <= 7:
        return 'Good'
    else:
        return 'Excellent'


In [9]:
df['Sleep Quality Category'] = df['Sleep Quality'].apply(categorize_sleep_quality)

In [10]:
df.head()

Unnamed: 0,Person_ID,Age,Gender,Sleep Start Time,Sleep End Time,Total Sleep Hours,Sleep Quality,Exercise (mins/day),Caffeine Intake (mg),Screen Time Before Bed (mins),Work Hours (hrs/day),Productivity Score,Mood Score,Stress Level,Sleep Quality Category
0,1860,32,Other,23.33,4.61,5.28,3,86,87,116,8.80892,8,3,6,Okay
1,1769,41,Female,21.02,2.43,5.41,5,32,21,88,6.329833,10,3,7,Okay
2,2528,20,Male,22.1,3.45,5.35,7,17,88,59,8.506306,10,9,10,Good
3,8041,37,Other,23.1,6.65,7.55,8,46,34,80,6.07024,8,4,2,Excellent
4,4843,46,Other,21.42,4.17,6.75,10,61,269,94,11.374994,8,7,9,Excellent


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Person_ID                      5000 non-null   int64  
 1   Age                            5000 non-null   int64  
 2   Gender                         5000 non-null   object 
 3   Sleep Start Time               5000 non-null   float64
 4   Sleep End Time                 5000 non-null   float64
 5   Total Sleep Hours              5000 non-null   float64
 6   Sleep Quality                  5000 non-null   int64  
 7   Exercise (mins/day)            5000 non-null   int64  
 8   Caffeine Intake (mg)           5000 non-null   int64  
 9   Screen Time Before Bed (mins)  5000 non-null   int64  
 10  Work Hours (hrs/day)           5000 non-null   float64
 11  Productivity Score             5000 non-null   int64  
 12  Mood Score                     5000 non-null   i

In [12]:
df['Gender'].value_counts()

Gender
Male      1718
Female    1675
Other     1607
Name: count, dtype: int64

In [13]:
df['Sleep Quality'].value_counts()

Sleep Quality
5     521
10    521
7     517
4     508
2     503
8     491
3     490
6     489
9     480
1     480
Name: count, dtype: int64

In [14]:
# Encoding
label=LabelEncoder()

In [15]:
df['Gender']=label.fit_transform(df['Gender'])

In [16]:
ordinal=OrdinalEncoder

In [17]:
encoder=OrdinalEncoder(categories=[['Poor','Okay','Good','Excellent']])

In [18]:
df[['Sleep Quality Category']] = encoder.fit_transform(df[['Sleep Quality Category']])

In [19]:
df['Sleep Quality Category'] = df['Sleep Quality Category'].astype(int)
df['Gender'] = df['Gender'].astype(int)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Person_ID                      5000 non-null   int64  
 1   Age                            5000 non-null   int64  
 2   Gender                         5000 non-null   int64  
 3   Sleep Start Time               5000 non-null   float64
 4   Sleep End Time                 5000 non-null   float64
 5   Total Sleep Hours              5000 non-null   float64
 6   Sleep Quality                  5000 non-null   int64  
 7   Exercise (mins/day)            5000 non-null   int64  
 8   Caffeine Intake (mg)           5000 non-null   int64  
 9   Screen Time Before Bed (mins)  5000 non-null   int64  
 10  Work Hours (hrs/day)           5000 non-null   float64
 11  Productivity Score             5000 non-null   int64  
 12  Mood Score                     5000 non-null   i

In [21]:
df.head()

Unnamed: 0,Person_ID,Age,Gender,Sleep Start Time,Sleep End Time,Total Sleep Hours,Sleep Quality,Exercise (mins/day),Caffeine Intake (mg),Screen Time Before Bed (mins),Work Hours (hrs/day),Productivity Score,Mood Score,Stress Level,Sleep Quality Category
0,1860,32,2,23.33,4.61,5.28,3,86,87,116,8.80892,8,3,6,1
1,1769,41,0,21.02,2.43,5.41,5,32,21,88,6.329833,10,3,7,1
2,2528,20,1,22.1,3.45,5.35,7,17,88,59,8.506306,10,9,10,2
3,8041,37,2,23.1,6.65,7.55,8,46,34,80,6.07024,8,4,2,3
4,4843,46,2,21.42,4.17,6.75,10,61,269,94,11.374994,8,7,9,3


In [22]:
# Feature Engineering
import numpy as np

# Nap Duration
df['Nap Duration'] = (8 - df['Total Sleep Hours']) * 10 + df['Stress Level'] * 5 - df['Mood Score'] * 3 - df['Work Hours (hrs/day)'] * 2
df['Nap Duration'] = np.maximum(df['Nap Duration'], 0)  # Manfiy qiymatlarni 0ga olib kelish

# Room Environment
df['Room Environment'] = df['Sleep Quality'] * 10 - df['Screen Time Before Bed (mins)'] * 0.3 - df['Caffeine Intake (mg)'] * 0.05
df['Room Environment'] = df['Room Environment'].clip(0, 100)  # 0-100 oraliqqa cheklash

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 17 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Person_ID                      5000 non-null   int64  
 1   Age                            5000 non-null   int64  
 2   Gender                         5000 non-null   int64  
 3   Sleep Start Time               5000 non-null   float64
 4   Sleep End Time                 5000 non-null   float64
 5   Total Sleep Hours              5000 non-null   float64
 6   Sleep Quality                  5000 non-null   int64  
 7   Exercise (mins/day)            5000 non-null   int64  
 8   Caffeine Intake (mg)           5000 non-null   int64  
 9   Screen Time Before Bed (mins)  5000 non-null   int64  
 10  Work Hours (hrs/day)           5000 non-null   float64
 11  Productivity Score             5000 non-null   int64  
 12  Mood Score                     5000 non-null   i

In [24]:
df[['Nap Duration','Room Environment']].describe()

Unnamed: 0,Nap Duration,Room Environment
count,5000.0,5000.0
mean,12.472526,26.02871
std,14.928831,25.749406
min,0.0,0.0
25%,0.0,0.0
50%,6.348696,20.6
75%,22.051567,45.6625
max,69.575729,98.9


In [25]:
df.head()

Unnamed: 0,Person_ID,Age,Gender,Sleep Start Time,Sleep End Time,Total Sleep Hours,Sleep Quality,Exercise (mins/day),Caffeine Intake (mg),Screen Time Before Bed (mins),Work Hours (hrs/day),Productivity Score,Mood Score,Stress Level,Sleep Quality Category,Nap Duration,Room Environment
0,1860,32,2,23.33,4.61,5.28,3,86,87,116,8.80892,8,3,6,1,30.58216,0.0
1,1769,41,0,21.02,2.43,5.41,5,32,21,88,6.329833,10,3,7,1,39.240334,22.55
2,2528,20,1,22.1,3.45,5.35,7,17,88,59,8.506306,10,9,10,2,32.487389,47.9
3,8041,37,2,23.1,6.65,7.55,8,46,34,80,6.07024,8,4,2,3,0.0,54.3
4,4843,46,2,21.42,4.17,6.75,10,61,269,94,11.374994,8,7,9,3,13.750012,58.35


In [26]:
x = df.drop(['Sleep Quality', 'Sleep Quality Category'], axis=1)

In [27]:
y = df['Sleep Quality Category']

In [28]:
x_train,x_temp,y_train,y_temp=train_test_split(x,y,test_size=0.2,random_state=42)
x_test,x_val,y_test,y_val=train_test_split(x_temp,y_temp,test_size=0.5,random_state=42)

In [29]:
dt_model=DecisionTreeClassifier(random_state=42)

In [30]:
dt_model.fit(x_train,y_train)

In [31]:
y_pred=dt_model.predict(x_test)

In [32]:
score1=accuracy_score(y_test,y_pred)


In [33]:
score1

0.846