<a href="https://colab.research.google.com/github/marcelloq2/BasedeDadosFutebol/blob/main/PyCaret_V%C3%ADdeo_03_Testando_V%C3%A1rios_Modelos_de_Classifica%C3%A7%C3%A3o.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Instalando o PyCaret

In [None]:
!pip install pycaret

### importando as Bibliotecas e Funções

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pycaret.classification import *

import warnings
warnings.filterwarnings('ignore')

def drop_reset_index(df):
    df = df.dropna()
    df = df.reset_index(drop=True)
    df.index += 1
    return df

### Importando as Base de Dados

In [None]:
url = "https://www.football-data.co.uk/new/BRA.csv"
df = pd.read_csv(url)
df = df[['Date','Home','Away','HG','AG','Res','PH','PD','PA']]
df.columns = ['Date','Home','Away','Goals_H','Goals_A','Result','Odd_H','Odd_D','Odd_A']
df[['Dia','Mes','Ano']] = df['Date'].str.split('/',expand=True)
df = df.drop(['Date'], axis=1)
df['Date'] = df['Ano']+'-'+df['Mes']+'-'+ df['Dia']
df = df.drop(columns=['Dia','Mes','Ano'], axis=1)
df = df[['Date','Home','Away','Goals_H','Goals_A','Result','Odd_H','Odd_D','Odd_A']]
df['Date'] = pd.to_datetime(df['Date'])
df = drop_reset_index(df)
display(df)

### Criando as Variáveis

In [None]:
# Período de Médias
n = 5

# Probabilidades
df['p_H'] = 1 / df['Odd_H']
df['p_D'] = 1 / df['Odd_D']
df['p_A'] = 1 / df['Odd_A']

# Saldo de Gols Ponderado
df['SG_H_Pond'] = (df['Goals_H'] - df['Goals_A']) * df['p_H']
df['SG_A_Pond'] = (df['Goals_A'] - df['Goals_H']) * df['p_A']

df['Media_SG_H'] = df.groupby('Home')['SG_H_Pond'].rolling(window=n, min_periods=n).mean().reset_index(0,drop=True)
df['Media_SG_A'] = df.groupby('Away')['SG_A_Pond'].rolling(window=n, min_periods=n).mean().reset_index(0,drop=True)

df['Media_SG_H'] = df.groupby('Home')['Media_SG_H'].shift(1)
df['Media_SG_A'] = df.groupby('Away')['Media_SG_A'].shift(1)

df['DesvPad_SG_H'] = df.groupby('Home')['SG_H_Pond'].rolling(window=n, min_periods=n).std(ddof=0).reset_index(0,drop=True)
df['DesvPad_SG_A'] = df.groupby('Away')['SG_A_Pond'].rolling(window=n, min_periods=n).std(ddof=0).reset_index(0,drop=True)

df['DesvPad_SG_H'] = df.groupby('Home')['DesvPad_SG_H'].shift(1)
df['DesvPad_SG_A'] = df.groupby('Away')['DesvPad_SG_A'].shift(1)

df['CV_SG_H'] = df['DesvPad_SG_H'] / df['Media_SG_H']
df['CV_SG_A'] = df['DesvPad_SG_A'] / df['Media_SG_A']

df.replace(np.inf, np.nan, inplace=True)
df = drop_reset_index(df)

df = df.drop(['SG_H_Pond','SG_A_Pond','DesvPad_SG_H','DesvPad_SG_A',], axis=1)

### Inicializando o Modelo

In [None]:
# Criando a Coluna de Alvo
df['Back_Home'] = 0
df.loc[(df['Result'] == 'H'), 'Back_Home'] = 1

df['Profit'] = -1
df.loc[(df['Result'] == 'H'), 'Profit'] = df['Odd_H'] - 1

In [None]:
# Separando os dados em treino e teste
train_df = df[df['Date'].dt.year <= 2022]
test_df = df[df['Date'].dt.year >= 2023]

In [None]:
features = ['Media_SG_H','Media_SG_A','CV_SG_H','CV_SG_A']
label = 'Back_Home'

In [None]:
# Configurando o ambiente em PyCaret
clf = setup(data=train_df[features], target=train_df[label], session_id=123, verbose=False)

In [None]:
# Comparando modelos para encontrar o melhor
compare_models()

In [None]:
clf_models = models()
modelos = clf_models.index.tolist()
modelos

In [None]:
for i in modelos:
    modelo = create_model(i, verbose=False)
    df0 = predict_model(modelo, test_df, verbose=False)

    filtro = df0.prediction_label == 1
    Entradas = df0[filtro]

    try:
        Entradas = drop_reset_index(Entradas)
        Entradas['Profit_acu'] = Entradas.Profit.cumsum()
        profit = round(Entradas.Profit_acu.tail(1).item(),2)
        ROI = round((Entradas.Profit_acu.tail(1)/len(Entradas)*100).item(),2)

        plt.figure()
        plt.plot(Entradas['Profit_acu'])
        plt.title(f"Profit Acumulado - Modelo {i}")
        plt.xlabel("Número de Jogos")
        plt.ylabel("Profit Acumulado")

        plt.show()

        print("")
        print("Modelo: "+i)
        print("Profit:",profit,"stakes em", len(Entradas),"jogos")
        print("ROI:",ROI,"%")
    except:
        pass