In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [22]:
# Load CSV
data = pd.read_csv("data/train.csv")  # Training + PublicTest combined
data.head()

Unnamed: 0,emotion,pixels,Usage,id
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training,0
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training,1
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training,2
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training,3
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training,4


In [23]:
# Convert pixels string to array
def pixels_to_array(pixels_str):
    return np.array([int(x) for x in pixels_str.split()])

X = np.stack(data['pixels'].apply(pixels_to_array).values)  # shape (n_samples, 2304)
y = data['emotion'].values

In [24]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [25]:
# Split for validation (optional)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [26]:
model = RandomForestClassifier(n_estimators=200, max_depth=30, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

In [27]:
# Validate
y_val_pred = model.predict(X_val)
acc = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {acc*100:.2f}%")

Validation Accuracy: 47.24%


In [29]:
# Load private test features
test_df = pd.read_csv("data/test.csv")
X_test = np.stack(test_df['pixels'].apply(pixels_to_array).values)
X_test = scaler.transform(X_test)
# Predict
y_pred = model.predict(X_test)

In [30]:
import os
# Ensure submissions folder exists
os.makedirs("submissions", exist_ok=True)
# Save submission
submission = pd.DataFrame({
    "id": test_df['id'],
    "predicted_label": y_pred
})
submission.to_csv("submissions/submission.csv", index=False)
print("Submission saved!")

Submission saved!
