In [None]:
# ===============================
# Match Winner Prediction + Hyperparameter Loop
# ===============================

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 1. Load dataset
file_path = "/content/every_match.csv"   # update if needed
df = pd.read_csv(file_path)

# --- Data Cleaning ---
df = df.dropna()
df = df.drop_duplicates()

# 2. Target column (encode to numbers)
y = df["FullTimeResult"]
le_target = LabelEncoder()
y = le_target.fit_transform(y)   # H/A/D -> 0/1/2

# 3. Drop leakage + irrelevant columns
leakage_cols = [
    "FullTimeHomeGoals", "FullTimeAwayGoals",
    "HalfTimeHomeGoals", "HalfTimeAwayGoals", "HalfTimeResult"
]
X = df.drop(columns=["Season", "MatchDate", "FullTimeResult"] + leakage_cols)

# 4. Encode categorical features (teams)
for col in X.columns:
    if X[col].dtype == "object":
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])

# 5. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 6. Hyperparameter grid
param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [10, 20, None],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [1, 4],
    "max_features": ["sqrt", "log2"]
}

best_score = 0
best_params = {}

# 7. Loop over all combinations
for n in param_grid["n_estimators"]:
    for depth in param_grid["max_depth"]:
        for split in param_grid["min_samples_split"]:
            for leaf in param_grid["min_samples_leaf"]:
                for feat in param_grid["max_features"]:

                    clf = RandomForestClassifier(
                        n_estimators=n,
                        max_depth=depth,
                        min_samples_split=split,
                        min_samples_leaf=leaf,
                        max_features=feat,
                        random_state=42,
                        n_jobs=-1,
                        class_weight="balanced"
                    )

                    clf.fit(X_train, y_train)
                    y_pred = clf.predict(X_test)
                    acc = accuracy_score(y_test, y_pred)

                    print(f"Params: n={n}, depth={depth}, split={split}, leaf={leaf}, feat={feat} --> Acc={acc:.4f}")

                    # Track best
                    if acc > best_score:
                        best_score = acc
                        best_params = {
                            "n_estimators": n,
                            "max_depth": depth,
                            "min_samples_split": split,
                            "min_samples_leaf": leaf,
                            "max_features": feat
                        }

# 8. Final best parameters + accuracy
print("======================================")
print(" Best Hyperparameters:", best_params)
print(f" Best Test Accuracy: {best_score:.4f}")
print("======================================")


Params: n=100, depth=10, split=2, leaf=1, feat=sqrt --> Acc=0.5501
Params: n=100, depth=10, split=2, leaf=1, feat=log2 --> Acc=0.5501
Params: n=100, depth=10, split=2, leaf=4, feat=sqrt --> Acc=0.5400
Params: n=100, depth=10, split=2, leaf=4, feat=log2 --> Acc=0.5400
Params: n=100, depth=10, split=5, leaf=1, feat=sqrt --> Acc=0.5480
Params: n=100, depth=10, split=5, leaf=1, feat=log2 --> Acc=0.5480
Params: n=100, depth=10, split=5, leaf=4, feat=sqrt --> Acc=0.5400
Params: n=100, depth=10, split=5, leaf=4, feat=log2 --> Acc=0.5400
Params: n=100, depth=20, split=2, leaf=1, feat=sqrt --> Acc=0.5666
Params: n=100, depth=20, split=2, leaf=1, feat=log2 --> Acc=0.5666
Params: n=100, depth=20, split=2, leaf=4, feat=sqrt --> Acc=0.5474
Params: n=100, depth=20, split=2, leaf=4, feat=log2 --> Acc=0.5474
Params: n=100, depth=20, split=5, leaf=1, feat=sqrt --> Acc=0.5640
Params: n=100, depth=20, split=5, leaf=1, feat=log2 --> Acc=0.5640
Params: n=100, depth=20, split=5, leaf=4, feat=sqrt --> Acc=0.