In [37]:
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

In [38]:
df = pd.read_csv('../../Leagues/1st_2nd_tiers_top_5_leagues.csv')
df.drop(columns=["Unnamed: 0"], inplace=True)
df = df[df['league'] == 'LaLiga2']

df

Unnamed: 0,year,league,tier,team,squad_depth,avg_age,foreigners,avg_market_value,market_value,has_relegated,has_promoted,has_won_titles
960,2010,LaLiga2,2,Real Betis Balompié,36,25.8,8,1200000,43100000,False,False,False
961,2010,LaLiga2,2,FC Barcelona B,36,21.1,4,1020000,36600000,False,True,False
962,2010,LaLiga2,2,Real Valladolid CF,37,26.0,6,854000,31600000,True,False,False
963,2010,LaLiga2,2,CD Tenerife,32,26.4,8,803000,25700000,False,False,False
964,2010,LaLiga2,2,Rayo Vallecano,31,26.3,5,790000,24500000,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
1285,2024,LaLiga2,2,Racing Ferrol,25,29.4,4,484000,12100000,False,False,False
1286,2024,LaLiga2,2,CD Eldense,25,28.5,6,468000,11700000,False,False,False
1287,2024,LaLiga2,2,FC Cartagena,21,28.9,5,514000,10800000,False,False,False
1288,2024,LaLiga2,2,CD Mirandés,21,24.6,4,424000,8900000,False,False,False


In [39]:
df['has_relegated'].value_counts()

has_relegated
False    289
True      41
Name: count, dtype: int64

In [40]:
X = df.drop(columns=['league', 'team', 'has_relegated'])  # Elimina la columna objetivo del conjunto de características
y = df['has_relegated']  # Define la variable objetivo

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1337, stratify=y)

TESTS BELOW

In [59]:
criterios = ['gini', 'entropy', 'log_loss']
profundidades = [3, 5, 10, 15, 25, 50, 100, 500]
mejor_clf = {'profundidad':'',
              'criterio':'',
              'accuracy':'',
              'precision':'',
              'recall':'',
              'f1':''}
mejor_recall = 0

# Asignamos un peso mayor para la clase 'False' y un peso menor para la clase 'True'
pesos_personalizados = {0: 1, 1: 7}

In [60]:
for criterio in tqdm(criterios):
    for profundidad in profundidades:
        # Crear el modelo de RandomForest
        rfc = RandomForestClassifier(criterion=criterio, max_depth=profundidad, random_state=1337, class_weight=pesos_personalizados)
        
        # Entrenar el modelo
        rfc.fit(X_train, y_train)
        
        # Realizar las predicciones
        predicciones = rfc.predict(X_test)
        
        # Calcular las métricas
        acc = accuracy_score(y_test, predicciones)
        prec = precision_score(y_test, predicciones)
        rec = recall_score(y_test, predicciones)
        f1 = f1_score(y_test, predicciones)
        
        if rec >= mejor_recall:
            mejor_recall = rec
            mejor_clf['profundidad'] = profundidad
            mejor_clf['criterio'] = criterio
            mejor_clf['accuracy'] = acc
            mejor_clf['precision'] = prec
            mejor_clf['recall'] = rec
            mejor_clf['f1'] = f1

100%|██████████| 3/3 [00:02<00:00,  1.22it/s]


In [61]:
mejor_clf

{'profundidad': 5,
 'criterio': 'log_loss',
 'accuracy': 0.8787878787878788,
 'precision': 0.5,
 'recall': 0.625,
 'f1': 0.5555555555555556}

MODEL TEST

In [62]:
clf = RandomForestClassifier(criterion='gini', max_depth=3, random_state=1337, class_weight='balanced')
clf

In [42]:
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [43]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()
print(report_df[['precision', 'recall', 'f1-score', 'support']])

              precision    recall  f1-score   support
False          0.941176  0.827586  0.880734  58.00000
True           0.333333  0.625000  0.434783   8.00000
accuracy       0.803030  0.803030  0.803030   0.80303
macro avg      0.637255  0.726293  0.657758  66.00000
weighted avg   0.867499  0.803030  0.826679  66.00000
