In [1]:

# Step 0: Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
import os


In [3]:
# Step 1: Load engineered dataset
df_path = "../data/combined_engineered.csv"
df = pd.read_csv(df_path)
print("✅ Loaded engineered dataset")
print("Shape:", df.shape)
print("Label distribution:\n", df['label'].value_counts())


✅ Loaded engineered dataset
Shape: (1572, 50)
Label distribution:
 label
2    670
1    455
0    447
Name: count, dtype: int64


In [4]:
# Step 2: Define features and labels
X = df.drop(columns=['label', 'Key', 'Participant_ID'])
y = df['label']

In [5]:
print("Number of features:", X.shape[1])

Number of features: 47


In [6]:
# Step 3: Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

Train shape: (1257, 47), Test shape: (315, 47)


In [7]:
# Step 4: Train Random Forest classifier
model = RandomForestClassifier(class_weight="balanced", random_state=42)
model.fit(X_train, y_train)
print("✅ Random Forest trained")

✅ Random Forest trained


In [8]:
# Step 5: Evaluate model
y_pred = model.predict(X_test)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0       0.65      0.74      0.69        91
           1       0.71      0.49      0.58        97
           2       0.72      0.82      0.77       127

    accuracy                           0.70       315
   macro avg       0.69      0.68      0.68       315
weighted avg       0.70      0.70      0.69       315



In [9]:
# Step 6: Save the trained model
os.makedirs("../models", exist_ok=True)
model_path = "../models/randomforest_sentiment.pkl"
joblib.dump(model, model_path)
print(f"✅ Model saved at {model_path}")

✅ Model saved at ../models/randomforest_sentiment.pkl
