In [57]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
import numpy as np
from imblearn.over_sampling import RandomOverSampler

# Load dataset 
df = pd.read_csv('dataset_mood_bfill_aggregated.csv')

# Drop rows with missing target values
df.dropna(subset=['mood'], inplace=True)

# Map mood to broader categories
def categorize_mood(mood):
    if mood <= 6:
        return 'Decent'
    elif mood <= 8:
        return 'Good'
    else:
        return 'Excellent'

df['mood_category'] = df['mood'].apply(categorize_mood)

# Oversample the minority classes
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(df.drop(columns=['mood', 'mood_category', 'id', 'time']), df['mood_category'])

# Define the window size
window_size = 30

# Create a rolling window of size 7 for features (X) and target variable (y) for all users
X_rolled = []
y_rolled = []
for user_id, data in df.groupby('id'):
    for i in range(len(data) - window_size + 1):
        X_rolled.append(data.iloc[i:i+window_size].drop(columns=['mood', 'mood_category', 'id', 'time']).values.flatten())
        y_rolled.append(data.iloc[i+window_size-1]['mood_category'])

# Convert lists to numpy arrays
X_rolled = np.array(X_rolled)
y_rolled = np.array(y_rolled)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_rolled, y_rolled, test_size=0.2, random_state=42)

# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=150, max_depth=20, min_samples_split=2, min_samples_leaf=1)

# Train the model
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred_test = rf_classifier.predict(X_test)

# Evaluate performance on the test set
accuracy = accuracy_score(y_test, y_pred_test)
try:
    auc = roc_auc_score(y_test, rf_classifier.predict_proba(X_test), average='macro', multi_class='ovr')
except ValueError:
    auc = None 
f1 = f1_score(y_test, y_pred_test, average='weighted')

print(f'Accuracy: {accuracy}')
print(f'AUC: {auc}')
print(f'F1-score: {f1}')


Accuracy: 0.8392857142857143
AUC: 0.8467574931019352
F1-score: 0.7659500693481276
