# Imports

In [1]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score


### Loading data

In [2]:
data = pd.read_csv('../data/decisions.csv')
data = data.drop(columns=['screenshot_title'])

features = data[['activity', 'hearts', 'light_lvl', 'in_hand_item', 'target_mob']]
targets = data[['decision_activity', 'decision_hearts', 'decision_light', 'decision_mob']]

features_train, features_test, targets_train, targets_test = train_test_split(features, targets, test_size=0.2)


### Preprocessing pipeline

In [3]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['activity', 'light_lvl', 'in_hand_item', 'target_mob']),
        ('num', StandardScaler(), ['hearts'])
    ])

clf = MultiOutputClassifier(RandomForestClassifier())


pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', clf)
])


### Training and saving model

In [4]:
pipeline.fit(features_train, targets_train)
joblib.dump(pipeline, "decisions_model.pkl")


['decisions_model.pkl']

### Evaluating model

In [5]:
print("Model Score:", pipeline.score(features_test, targets_test))


Model Score: 0.8702791461412152


In [6]:
# Detailed model insight
per_column_accuracies = {}
predictions = pipeline.predict(features_test)
pred_df = pd.DataFrame(predictions, columns=targets_test.columns, index=targets_test.index)

for col in targets_test.columns:
    print(f"\nClassification Report for: {col}")
    print(classification_report(targets_test[col], pred_df[col]))
    per_column_accuracies[col] = accuracy_score(targets_test[col], pred_df[col])

avg_accuracy = sum(per_column_accuracies.values()) / len(per_column_accuracies)
print("\nAccuracy per target:", per_column_accuracies)
print(f"Average Accuracy across all outputs: {avg_accuracy:.4f}")



Classification Report for: decision_activity
                      precision    recall  f1-score   support

          give_haste       1.00      1.00      1.00       131
     give_jump_boost       0.97      1.00      0.99        99
     give_resistance       1.00      0.98      0.99       377
          give_speed       1.00      1.00      1.00       100
       give_strength       0.99      1.00      1.00       307
give_water_breathing       1.00      1.00      1.00       204

            accuracy                           1.00      1218
           macro avg       0.99      1.00      1.00      1218
        weighted avg       1.00      1.00      1.00      1218


Classification Report for: decision_hearts
                        precision    recall  f1-score   support

   give_regeneration_1       0.96      0.93      0.95       117
   give_regeneration_2       0.95      0.97      0.96       124
   give_regeneration_3       0.98      1.00      0.99       248
   give_regeneration_4       1