In [14]:
# !pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp312-cp312-macosx_10_9_universal2.whl.metadata (3.0 kB)
Downloading hmmlearn-0.3.3-cp312-cp312-macosx_10_9_universal2.whl (196 kB)
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3


In [37]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from hmmlearn.hmm import GaussianHMM

In [6]:
sleep = pd.read_csv('student_sleep_patterns.csv')

# Keep relevant columns: 5 observations and 1 hidden state
sleep = sleep[['Sleep_Duration', 'Study_Hours', 'Screen_Time', 'Caffeine_Intake', 'Physical_Activity', 'Sleep_Quality']]
sleep

Unnamed: 0,Sleep_Duration,Study_Hours,Screen_Time,Caffeine_Intake,Physical_Activity,Sleep_Quality
0,7.7,7.9,3.4,2,37,10
1,6.3,6.0,1.9,5,74,2
2,5.1,6.7,3.9,5,53,5
3,6.3,8.6,2.8,4,55,9
4,4.7,2.7,2.7,0,85,3
...,...,...,...,...,...,...
495,5.1,9.3,1.9,4,110,4
496,8.9,7.7,3.5,3,40,4
497,5.7,6.4,3.9,1,68,10
498,4.9,0.5,3.5,0,12,2


**Columns:**
- Sleep_Duration: Total hours of sleep per night (float).
- Study_Hours: Average number of hours spent studying per day (float).
- Screen_Time: Average number of hours spent on screens (excluding studying) per day (float).
- Caffeine_Intake: Average number of caffeinated beverages consumed per day (integer).
- Physical_Activity: Average minutes spent on physical activity per day (integer).
- Sleep_Quality: Subjective rating of sleep quality on a scale of 1 to 10 (1 being the worst, 10 being the best) (integer).

In [9]:
# Data types are numeric
sleep.dtypes

Sleep_Duration       float64
Study_Hours          float64
Screen_Time          float64
Caffeine_Intake        int64
Physical_Activity      int64
Sleep_Quality          int64
dtype: object

In [12]:
# No missing values
sleep.isna().sum().sum()

np.int64(0)

In [48]:
# Cluster sleep-quality into 3 levels
def sleep_level(x):
    if x <= 4: return 0
    elif x <= 7: return 1
    else: return 2

sleep['Sleep_Quality'] = sleep['Sleep_Quality'].apply(sleep_level)

In [49]:
# Standardize observed data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(sleep[['Sleep_Duration', 'Study_Hours', 
                                       'Screen_Time', 'Caffeine_Intake', 
                                       'Physical_Activity']])

In [50]:
# Observations O_t
X = sleep[['Sleep_Duration', 'Study_Hours', 'Screen_Time',
           'Caffeine_Intake', 'Physical_Activity']].values
X

array([[ 7.7,  7.9,  3.4,  2. , 37. ],
       [ 6.3,  6. ,  1.9,  5. , 74. ],
       [ 5.1,  6.7,  3.9,  5. , 53. ],
       ...,
       [ 5.7,  6.4,  3.9,  1. , 68. ],
       [ 4.9,  0.5,  3.5,  0. , 12. ],
       [ 7.9, 11.6,  1. ,  0. , 86. ]], shape=(500, 5))

In [51]:
# Hidden state labels
states = sleep['Sleep_Quality'].values
n_states = len(np.unique(states))

np.unique(states)

array([0, 1, 2])

In [52]:
# Initialize HMM
model = GaussianHMM(
    n_components=n_states,       # Sleep quality states (0-2)
    covariance_type="full",      # Full covariance Gaussian emissions
    n_iter=200,                  # EM iterations (Baum-Welch)
    init_params=""               # We will manually initialize parameters
)

In [53]:
# Set initial distribution = uniform
model.startprob_ = np.ones(n_states) / n_states

In [54]:
# Random transition matrix
A = np.random.rand(n_states, n_states)
A = A / A.sum(axis=1, keepdims=True)
model.transmat_ = A

In [55]:
# Initialize Gaussian emission parameters
means = []
covs = []

for s in np.unique(states):
    X_s = X[states == s]
    means.append(X_s.mean(axis=0))
    covs.append(np.cov(X_s.T) + 1e-6 * np.eye(X_s.shape[1]))  # stabilize covariance

means = np.array(means)
covs = np.array(covs)

model.means_ = means
model.covars_ = covs

In [56]:
# Run Forwards-Backwards alg
model.fit(X)

In [57]:
# Run Viterbi alg
log_prob, viterbi_states = model.decode(X, algorithm="viterbi")

In [58]:
# Make states 1–10 instead of 0–9
sleep['Predicted_Sleep_Quality'] = viterbi_states + 1
sleep

Unnamed: 0,Sleep_Duration,Study_Hours,Screen_Time,Caffeine_Intake,Physical_Activity,Sleep_Quality,Predicted_Sleep_Quality,Sleep_Level
0,7.7,7.9,3.4,2,37,2,1,2
1,6.3,6.0,1.9,5,74,0,2,0
2,5.1,6.7,3.9,5,53,1,2,1
3,6.3,8.6,2.8,4,55,2,2,2
4,4.7,2.7,2.7,0,85,0,2,0
...,...,...,...,...,...,...,...,...
495,5.1,9.3,1.9,4,110,0,2,0
496,8.9,7.7,3.5,3,40,0,1,0
497,5.7,6.4,3.9,1,68,2,2,2
498,4.9,0.5,3.5,0,12,0,2,0


In [59]:
accuracy = np.mean(sleep['Sleep_Quality'] == sleep['Predicted_Sleep_Quality'])
print("Viterbi reconstruction accuracy:", accuracy)

Viterbi reconstruction accuracy: 0.268


In [60]:
mae = np.mean(np.abs(sleep['Sleep_Quality'] - sleep['Predicted_Sleep_Quality']))
print("MAE:", mae)

MAE: 1.028
