In [25]:
# imports
import numpy as np
import matplotlib.pyplot as olt
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
# splits dataset into training and testing data
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import mean_absolute_error, mean_squared_error

from mord import OrdinalRidge

In [31]:
# reshaping data to numerical form
# ordinal encoder -- [0,3]

file = pd.read_csv('final_avg_hr_data_updated.csv')
df = pd.DataFrame(file)

X = df[['heartrate','steps']]
y = df[['promis score']]

encoder = OrdinalEncoder(categories=[['Not fatigued at all','A little bit fatigued','Somewhat fatigued','Very fatigued']])
# y_encoded = encoder.fit_transform(y)
y_encoded = encoder.fit_transform(y.values.reshape(-1,1)).ravel().astype(int)

df_y_encoded = pd.DataFrame(y_encoded)
y_names = encoder.inverse_transform(y_encoded.reshape(-1,1))
df_y_names = pd.DataFrame(y_names)

map_data = [df_y_names, df_y_encoded] 
result = pd.concat(map_data, axis=1) # promis score labels and corresponding numerical value

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42) # split data into training and testing sets

In [7]:
# one vs the rest strategy

from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

model = OneVsRestClassifier(LogisticRegression())
model.fit(X, y_encoded)

predictions = model.predict(X)
probabilities = model.predict_proba(X)

print("Predictions shape:", predictions.shape)
print("Probabilities shape:", probabilities.shape)

Predictions shape: (14204,)
Probabilities shape: (14204, 4)


In [None]:
# using polynomial features
# fix this -- ISSUE -- model.fit(X_poly, y)

from mord import LogisticIT
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

model = LogisticIT()
model.fit(X_poly, y)

predictions = model.predict(X_poly)

print("Original feature shape:", X.shape)
print("Polynomial feature shape:", X_poly.shape)
print("Predictions shape:", predictions.shape)

In [None]:
# model metric data
# can import SMOETENC, check class dist, plot the coefficients of a model

ord_reg = OrdinalRidge()
ord_reg.fit(X_train, y_train)
y_pred = ord_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
cm = confusion_matrix(y_test,y_pred)
print(cm)

In [None]:
import seaborn as sns
sns.heatmap(cm/np.sum(cm), annot=True,fmt='.2%', cmap='GnBu')

In [45]:
from mord import LogisticIT

# X- data, y- ordinal outcome
model = LogisticIT()
model.fit(X_train,y_train)

train_accuracy = model.score(X_train,y_train)
test_accuracy = model.score(X_test,y_test)

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

# for a new prediction
new_log = np.array([[160,10000]])  # heartrate, steps
predicted_level = model.predict(new_log)
fatigue_levels = ['Not fatigued at all','A little bit fatigued','Somewhat fatigued','Very fatigued']
print(f"Predicted Fatigue Level: {fatigue_levels[predicted_level[0]]}")

Train Accuracy: 0.3900
Test Accuracy: 0.3837
heartrate: -0.0017
steps: 0.0000
Predicted Fatigue Level: A little bit fatigued
