In [None]:
# @title
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from IPython.display import display_html

# Funções utilitárias

def preprocess_data(df, columns_to_remove, categorical_columns, encoder):
    """
    Preprocessa o DataFrame para treinar ou realizar inferências.

    Args:
        df (pd.DataFrame): DataFrame a ser preprocessado.
        columns_to_remove (list): Lista de colunas a serem removidas.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame preprocessado.
    """
    df = df.drop(columns=columns_to_remove, errors='ignore')
    df['OffRtg_Diff'] = df['OffRtg_HOME'] - df['OffRtg_AWAY']
    df['OffRtg_Diff_HOME'] = df['OffRtg_HOME'] - df['DefRtg_AWAY']
    df['OffRtg_Diff_AWAY'] = df['OffRtg_AWAY'] - df['DefRtg_HOME']
    df['DefRtg_Diff'] = df['DefRtg_HOME'] - df['DefRtg_AWAY']
    df['NetRtg_Diff'] = df['NetRtg_HOME'] - df['NetRtg_AWAY']
    df['PaceRtg_Diff'] = df['Pace_HOME'] - df['Pace_AWAY']
    df[categorical_columns] = df[categorical_columns].apply(encoder.fit_transform)
    return df

def predict_and_process(model, dftp_, categorical_columns, encoder):
    """
    Realiza a predição e processa os resultados.

    Args:
        model: Modelo treinado para predições.
        dftp_ (pd.DataFrame): DataFrame de entrada para inferência.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame com resultados processados.
    """
    dftp = preprocess_data(
        dftp_,
        columns_to_remove=['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result'],
        categorical_columns=categorical_columns,
        encoder=encoder
    )
    pred = model.predict(dftp)
    dftp_[['Result_HOME', 'Result_AWAY']] = pred
    dftp_['Result_HOME'] = dftp_['Result_HOME'].round(2)
    dftp_['Result_AWAY'] = dftp_['Result_AWAY'].round(2)
    dftp_['TP'] = dftp_['Result_HOME'] + dftp_['Result_AWAY']
    dftp_.loc[dftp_['Result_HOME'] >= dftp_['Result_AWAY'], 'Winner'] = 'HOME'
    dftp_.loc[dftp_['Result_HOME'] < dftp_['Result_AWAY'], 'Winner'] = 'AWAY'
    return dftp_[['Home Team', 'Away Team', 'Result_HOME', 'Result_AWAY', 'TP', 'Winner']]

def display_side_by_side(*dfs):
    """
    Exibe múltiplos DataFrames lado a lado.

    Args:
        *dfs: DataFrames a serem exibidos.
    """
    html_str = ''.join([df.to_html() for df in dfs])
    display_html(html_str.replace('table', 'table style="display:inline"'), raw=True)

# Configurações iniciais
columns_to_remove = ['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result']
categorical_columns = ['Season', 'Home Team', 'Away Team']
encoder = LabelEncoder()

# Carregar e preprocessar dados para treinamento
df = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/ALL.csv')
df = preprocess_data(df, columns_to_remove, categorical_columns, encoder)
X = df.drop(columns=['Result_HOME', 'Result_AWAY'])
y = df[['Result_HOME', 'Result_AWAY']]
sample_weights = np.linspace(1, 2, len(X))

# Treinamento dos modelos
model_rf = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model_rf.fit(X, y, sample_weight=sample_weights)

model_xgbr = MultiOutputRegressor(
    XGBRegressor(objective='reg:squarederror', random_state=42)
)
model_xgbr.fit(X, y, sample_weight=sample_weights)


# Inferência com modelo RandomForest
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')
r1 = predict_and_process(model_rf, dftp, categorical_columns, encoder)

# Inferência com modelo XGBoost
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')
r2 = predict_and_process(model_xgbr, dftp, categorical_columns, encoder)

# Exibir resultados lado a lado
display_side_by_side(r1, r2)

Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,116.09,106.97,223.06,HOME
1,New Orleans Pelicans,Sacramento Kings,111.83,117.71,229.54,AWAY
2,Dallas Mavericks,Miami Heat,115.17,110.72,225.89,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,111.78,111.38,223.16,HOME
4,Utah Jazz,LA Clippers,109.13,114.98,224.11,AWAY

Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,110.730003,107.599998,218.330002,HOME
1,New Orleans Pelicans,Sacramento Kings,110.690002,114.519997,225.209991,AWAY
2,Dallas Mavericks,Miami Heat,120.82,108.440002,229.26001,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,96.970001,111.739998,208.709991,AWAY
4,Utah Jazz,LA Clippers,108.739998,113.43,222.169998,AWAY


In [None]:
# @title
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
from IPython.display import display_html

# Funções utilitárias

def preprocess_data(df, columns_to_remove, categorical_columns, encoder):
    """
    Preprocessa o DataFrame para treinar ou realizar inferências.

    Args:
        df (pd.DataFrame): DataFrame a ser preprocessado.
        columns_to_remove (list): Lista de colunas a serem removidas.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame preprocessado.
    """
    df = df.drop(columns=columns_to_remove, errors='ignore')
    df['OffRtg_Diff'] = df['OffRtg_HOME'] - df['OffRtg_AWAY']
    df['OffRtg_Diff_HOME'] = df['OffRtg_HOME'] - df['DefRtg_AWAY']
    df['OffRtg_Diff_AWAY'] = df['OffRtg_AWAY'] - df['DefRtg_HOME']
    df['DefRtg_Diff'] = df['DefRtg_HOME'] - df['DefRtg_AWAY']
    df['NetRtg_Diff'] = df['NetRtg_HOME'] - df['NetRtg_AWAY']
    df['PaceRtg_Diff'] = df['Pace_HOME'] - df['Pace_AWAY']
    df[categorical_columns] = df[categorical_columns].apply(encoder.fit_transform)
    return df

def predict_and_process_row(model, row, categorical_columns, encoder):
    """
    Realiza a predição e processa os resultados para uma única linha.

    Args:
        model: Modelo treinado para predições.
        row (pd.DataFrame): DataFrame contendo uma única linha para inferência.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame com resultados processados para a linha.
    """
    row = preprocess_data(
        row,
        columns_to_remove=['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result'],
        categorical_columns=categorical_columns,
        encoder=encoder
    )
    pred = model.predict(row)
    row[['Result_HOME', 'Result_AWAY']] = pred
    row['Result_HOME'] = row['Result_HOME'].round(2)
    row['Result_AWAY'] = row['Result_AWAY'].round(2)
    row['TP'] = row['Result_HOME'] + row['Result_AWAY']
    row['Winner'] = 'HOME' if row['Result_HOME'].iloc[0] >= row['Result_AWAY'].iloc[0] else 'AWAY'
    return row[['Home Team', 'Away Team', 'Result_HOME', 'Result_AWAY', 'TP', 'Winner']]

def display_side_by_side(*dfs):
    """
    Exibe múltiplos DataFrames lado a lado.

    Args:
        *dfs: DataFrames a serem exibidos.
    """
    html_str = ''.join([df.to_html() for df in dfs])
    display_html(html_str.replace('table', 'table style="display:inline"'), raw=True)

# Configurações iniciais
columns_to_remove = ['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result']
categorical_columns = ['Season', 'Home Team', 'Away Team']
encoder = LabelEncoder()

# Carregar e preprocessar dados para treinamento
df = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/ALL.csv')
df = preprocess_data(df, columns_to_remove, categorical_columns, encoder)
X = df.drop(columns=['Result_HOME', 'Result_AWAY'])
y = df[['Result_HOME', 'Result_AWAY']]
sample_weights = np.linspace(1, 2, len(X))

# Treinamento dos modelos
model_rf = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model_rf.fit(X, y, sample_weight=sample_weights)

model_xgbr = MultiOutputRegressor(
    XGBRegressor(objective='reg:squarederror', random_state=42)
)
model_xgbr.fit(X, y, sample_weight=sample_weights)


# Inferência
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')

results_rf = pd.DataFrame()
results_xgbr = pd.DataFrame()

for _, row in dftp.iterrows():
    row_df = pd.DataFrame([row])
    result_rf = predict_and_process_row(model_rf, row_df, categorical_columns, encoder)
    result_xgbr = predict_and_process_row(model_xgbr, row_df, categorical_columns, encoder)
    result_rf[['Home Team']], result_rf[['Away Team']] = row_df[['Home Team']], row_df[['Away Team']]
    result_xgbr[['Home Team']], result_xgbr[['Away Team']] = row_df[['Home Team']], row_df[['Away Team']]
    # result_xgbr[['Home Team']] = row_df[['Home Team']]
    # result_xgbr[['Away Team']] = row_df[['Away Team']]
    results_rf = pd.concat([results_rf, result_rf], ignore_index=True)
    results_xgbr = pd.concat([results_xgbr, result_xgbr], ignore_index=True)

# Exibir resultados lado a lado
display_side_by_side(results_rf, results_xgbr)


Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,116.32,107.14,223.46,HOME
1,New Orleans Pelicans,Sacramento Kings,113.52,116.4,229.92,AWAY
2,Dallas Mavericks,Miami Heat,114.72,110.28,225.0,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,111.25,111.35,222.6,AWAY
4,Utah Jazz,LA Clippers,110.29,112.39,222.68,AWAY

Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,112.089996,111.410004,223.5,HOME
1,New Orleans Pelicans,Sacramento Kings,115.870003,116.110001,231.980011,AWAY
2,Dallas Mavericks,Miami Heat,115.470001,108.809998,224.279999,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,105.360001,110.32,215.679993,AWAY
4,Utah Jazz,LA Clippers,118.82,110.07,228.889999,HOME


In [None]:
# @title
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from IPython.display import display_html

# Funções utilitárias

def preprocess_data(df, columns_to_remove, categorical_columns, encoder):
    """
    Preprocessa o DataFrame para treinar ou realizar inferências.

    Args:
        df (pd.DataFrame): DataFrame a ser preprocessado.
        columns_to_remove (list): Lista de colunas a serem removidas.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame preprocessado.
    """
    df = df.drop(columns=columns_to_remove, errors='ignore')
    df['OffRtg_Diff'] = df['OffRtg_HOME'] - df['OffRtg_AWAY']
    df['OffRtg_Diff_HOME'] = df['OffRtg_HOME'] - df['DefRtg_AWAY']
    df['OffRtg_Diff_AWAY'] = df['OffRtg_AWAY'] - df['DefRtg_HOME']
    df['DefRtg_Diff'] = df['DefRtg_HOME'] - df['DefRtg_AWAY']
    df['NetRtg_Diff'] = df['NetRtg_HOME'] - df['NetRtg_AWAY']
    df['PaceRtg_Diff'] = df['Pace_HOME'] - df['Pace_AWAY']
    df[categorical_columns] = df[categorical_columns].apply(encoder.fit_transform)
    return df

def predict_and_process(model, dftp_, categorical_columns, encoder):
    """
    Realiza a predição e processa os resultados.

    Args:
        model: Modelo treinado para predições.
        dftp_ (pd.DataFrame): DataFrame de entrada para inferência.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame com resultados processados.
    """
    dftp = preprocess_data(
        dftp_,
        columns_to_remove=['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result'],
        categorical_columns=categorical_columns,
        encoder=encoder
    )
    pred = model.predict(dftp)
    dftp_[['Result_HOME', 'Result_AWAY']] = pred
    dftp_['Result_HOME'] = dftp_['Result_HOME'].round(2)
    dftp_['Result_AWAY'] = dftp_['Result_AWAY'].round(2)
    dftp_['TP'] = dftp_['Result_HOME'] + dftp_['Result_AWAY']
    dftp_.loc[dftp_['Result_HOME'] >= dftp_['Result_AWAY'], 'Winner'] = 'HOME'
    dftp_.loc[dftp_['Result_HOME'] < dftp_['Result_AWAY'], 'Winner'] = 'AWAY'
    return dftp_[['Home Team', 'Away Team', 'Result_HOME', 'Result_AWAY', 'TP', 'Winner']]

def display_side_by_side(*dfs):
    """
    Exibe múltiplos DataFrames lado a lado.

    Args:
        *dfs: DataFrames a serem exibidos.
    """
    html_str = ''.join([df.to_html() for df in dfs])
    display_html(html_str.replace('table', 'table style="display:inline"'), raw=True)

# Configurações iniciais
columns_to_remove = ['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result']
categorical_columns = ['Season', 'Home Team', 'Away Team']
encoder = LabelEncoder()

# Carregar e preprocessar dados para treinamento
df = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/ALL.csv')
df = preprocess_data(df, columns_to_remove, categorical_columns, encoder)
X = df.drop(columns=['Result_HOME', 'Result_AWAY'])
y = df[['Result_HOME', 'Result_AWAY']]

# Função ajustada para calcular os pesos com base em (log(x) + x) normalizada no intervalo [1, 2]
sample_weights_ = np.linspace(1, 2, len(X))
sample_weights_ = np.log(sample_weights_) + sample_weights_

# Normalizando os pesos para o intervalo [1, 2]
sample_weights_ = 1 + (sample_weights_ - sample_weights_.min()) / (sample_weights_.max() - sample_weights_.min())
sample_weights = sample_weights_

# Treinamento dos modelos
model_rf = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model_rf.fit(X, y, sample_weight=sample_weights)

model_xgbr = MultiOutputRegressor(
    XGBRegressor(objective='reg:squarederror', random_state=42)
)
model_xgbr.fit(X, y, sample_weight=sample_weights)


# Inferência com modelo RandomForest
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')
r1_ = predict_and_process(model_rf, dftp, categorical_columns, encoder)

# Inferência com modelo XGBoost
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')
r2_ = predict_and_process(model_xgbr, dftp, categorical_columns, encoder)

# Exibir resultados lado a lado
display_side_by_side(r1_, r2_)

Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,115.85,107.67,223.52,HOME
1,New Orleans Pelicans,Sacramento Kings,112.34,117.59,229.93,AWAY
2,Dallas Mavericks,Miami Heat,113.87,111.73,225.6,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,110.4,112.16,222.56,AWAY
4,Utah Jazz,LA Clippers,109.05,115.9,224.95,AWAY

Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,110.029999,104.589996,214.619995,HOME
1,New Orleans Pelicans,Sacramento Kings,114.57,123.279999,237.850006,AWAY
2,Dallas Mavericks,Miami Heat,118.089996,112.830002,230.919998,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,109.709999,107.699997,217.410004,HOME
4,Utah Jazz,LA Clippers,114.400002,114.260002,228.660004,HOME


In [None]:
# @title
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
from IPython.display import display_html

# Funções utilitárias

def preprocess_data(df, columns_to_remove, categorical_columns, encoder):
    """
    Preprocessa o DataFrame para treinar ou realizar inferências.

    Args:
        df (pd.DataFrame): DataFrame a ser preprocessado.
        columns_to_remove (list): Lista de colunas a serem removidas.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame preprocessado.
    """
    df = df.drop(columns=columns_to_remove, errors='ignore')
    df['OffRtg_Diff'] = df['OffRtg_HOME'] - df['OffRtg_AWAY']
    df['OffRtg_Diff_HOME'] = df['OffRtg_HOME'] - df['DefRtg_AWAY']
    df['OffRtg_Diff_AWAY'] = df['OffRtg_AWAY'] - df['DefRtg_HOME']
    df['DefRtg_Diff'] = df['DefRtg_HOME'] - df['DefRtg_AWAY']
    df['NetRtg_Diff'] = df['NetRtg_HOME'] - df['NetRtg_AWAY']
    df['PaceRtg_Diff'] = df['Pace_HOME'] - df['Pace_AWAY']
    df[categorical_columns] = df[categorical_columns].apply(encoder.fit_transform)
    return df

def predict_and_process_row(model, row, categorical_columns, encoder):
    """
    Realiza a predição e processa os resultados para uma única linha.

    Args:
        model: Modelo treinado para predições.
        row (pd.DataFrame): DataFrame contendo uma única linha para inferência.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame com resultados processados para a linha.
    """
    row = preprocess_data(
        row,
        columns_to_remove=['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result'],
        categorical_columns=categorical_columns,
        encoder=encoder
    )
    pred = model.predict(row)
    row[['Result_HOME', 'Result_AWAY']] = pred
    row['Result_HOME'] = row['Result_HOME'].round(2)
    row['Result_AWAY'] = row['Result_AWAY'].round(2)
    row['TP'] = row['Result_HOME'] + row['Result_AWAY']
    row['Winner'] = 'HOME' if row['Result_HOME'].iloc[0] >= row['Result_AWAY'].iloc[0] else 'AWAY'
    return row[['Home Team', 'Away Team', 'Result_HOME', 'Result_AWAY', 'TP', 'Winner']]

def display_side_by_side(*dfs):
    """
    Exibe múltiplos DataFrames lado a lado.

    Args:
        *dfs: DataFrames a serem exibidos.
    """
    html_str = ''.join([df.to_html() for df in dfs])
    display_html(html_str.replace('table', 'table style="display:inline"'), raw=True)

# Configurações iniciais
columns_to_remove = ['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result']
categorical_columns = ['Season', 'Home Team', 'Away Team']
encoder = LabelEncoder()

# Carregar e preprocessar dados para treinamento
df = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/ALL.csv')
df = preprocess_data(df, columns_to_remove, categorical_columns, encoder)
X = df.drop(columns=['Result_HOME', 'Result_AWAY'])
y = df[['Result_HOME', 'Result_AWAY']]

# Função ajustada para calcular os pesos com base em (log(x) + x) normalizada no intervalo [1, 2]
sample_weights_ = np.linspace(1, 2, len(X))
sample_weights_ = np.log(sample_weights_) + sample_weights_

# Normalizando os pesos para o intervalo [1, 2]
sample_weights_ = 1 + (sample_weights_ - sample_weights_.min()) / (sample_weights_.max() - sample_weights_.min())
sample_weights = sample_weights_

# Treinamento dos modelos
model_rf = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model_rf.fit(X, y, sample_weight=sample_weights)

model_xgbr = MultiOutputRegressor(
    XGBRegressor(objective='reg:squarederror', random_state=42)
)
model_xgbr.fit(X, y, sample_weight=sample_weights)


# Inferência
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')

results_rf_ = pd.DataFrame()
results_xgbr_ = pd.DataFrame()

for _, row in dftp.iterrows():
    row_df = pd.DataFrame([row])
    result_rf = predict_and_process_row(model_rf, row_df, categorical_columns, encoder)
    result_xgbr = predict_and_process_row(model_xgbr, row_df, categorical_columns, encoder)
    result_rf[['Home Team']], result_rf[['Away Team']] = row_df[['Home Team']], row_df[['Away Team']]
    result_xgbr[['Home Team']], result_xgbr[['Away Team']] = row_df[['Home Team']], row_df[['Away Team']]
    # result_xgbr[['Home Team']] = row_df[['Home Team']]
    # result_xgbr[['Away Team']] = row_df[['Away Team']]
    results_rf_ = pd.concat([results_rf_, result_rf], ignore_index=True)
    results_xgbr_ = pd.concat([results_xgbr_, result_xgbr], ignore_index=True)

# Exibir resultados lado a lado
display_side_by_side(results_rf_, results_xgbr_)


Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,116.17,107.85,224.02,HOME
1,New Orleans Pelicans,Sacramento Kings,113.93,117.18,231.11,AWAY
2,Dallas Mavericks,Miami Heat,113.24,111.47,224.71,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,110.36,112.45,222.81,AWAY
4,Utah Jazz,LA Clippers,109.07,114.55,223.62,AWAY

Unnamed: 0,Home Team,Away Team,Result_HOME,Result_AWAY,TP,Winner
0,Houston Rockets,Golden State Warriors,112.5,105.709999,218.209991,HOME
1,New Orleans Pelicans,Sacramento Kings,116.790001,112.93,229.720001,HOME
2,Dallas Mavericks,Miami Heat,113.099998,112.790001,225.889999,HOME
3,Minnesota Timberwolves,Oklahoma City Thunder,109.769997,110.169998,219.940002,AWAY
4,Utah Jazz,LA Clippers,131.440002,112.0,243.440002,HOME


In [None]:
# @title
def calcular_estatisticas(df):
    estatisticas = pd.DataFrame()
    estatisticas['media'] = df.mean(axis=1)
    estatisticas['mediana'] = df.median(axis=1)
    estatisticas['minimo'] = df.min(axis=1)
    estatisticas['maximo'] = df.max(axis=1)
    return estatisticas

# print(calcular_estatisticas(pd.concat([r1['TP'].reset_index(drop=True), r2['TP'].reset_index(drop=True), results_rf['TP'].reset_index(drop=True),
#                                        results_xgbr['TP'].reset_index(drop=True), r1_['TP'].reset_index(drop=True), r2_['TP'].reset_index(drop=True),
#                                        results_rf_['TP'].reset_index(drop=True), results_xgbr_['TP'].reset_index(drop=True)], axis=1)))

# Suponha que você tenha 8 DataFrames (df1, df2, ..., df8)
dataframes = [r1, r2, results_rf, results_xgbr, r1_, r2_, results_rf_, results_xgbr_]

# Criar um único DataFrame contendo todas as colunas 'TP'
df_tp = pd.concat([df[['Result_HOME']].reset_index(drop=True) for df in dataframes], axis=1)

# Aplicar a função calcular_estatisticas()
estatisticas = calcular_estatisticas(df_tp)

display_side_by_side(r1[['Home Team', 'Away Team']], estatisticas)

Unnamed: 0,Home Team,Away Team
0,Houston Rockets,Golden State Warriors
1,New Orleans Pelicans,Sacramento Kings
2,Dallas Mavericks,Miami Heat
3,Minnesota Timberwolves,Oklahoma City Thunder
4,Utah Jazz,LA Clippers

Unnamed: 0,media,mediana,minimo,maximo
0,113.7225,114.175,110.029999,116.32
1,113.692501,113.725,110.690002,116.790001
2,115.559999,114.945,113.099998,120.82
3,108.2,110.064998,96.970001,111.78
4,113.8675,109.71,108.739998,131.440002


In [None]:
# @title
def calcular_estatisticas(df):
    estatisticas = pd.DataFrame()
    estatisticas['media'] = df.mean(axis=1)
    estatisticas['mediana'] = df.median(axis=1)
    estatisticas['minimo'] = df.min(axis=1)
    estatisticas['maximo'] = df.max(axis=1)
    return estatisticas

# print(calcular_estatisticas(pd.concat([r1['TP'].reset_index(drop=True), r2['TP'].reset_index(drop=True), results_rf['TP'].reset_index(drop=True),
#                                        results_xgbr['TP'].reset_index(drop=True), r1_['TP'].reset_index(drop=True), r2_['TP'].reset_index(drop=True),
#                                        results_rf_['TP'].reset_index(drop=True), results_xgbr_['TP'].reset_index(drop=True)], axis=1)))

# Suponha que você tenha 8 DataFrames (df1, df2, ..., df8)
dataframes = [r1, r2, results_rf, results_xgbr, r1_, r2_, results_rf_, results_xgbr_]

# Criar um único DataFrame contendo todas as colunas 'TP'
df_tp = pd.concat([df[['Result_AWAY']].reset_index(drop=True) for df in dataframes], axis=1)

# Aplicar a função calcular_estatisticas()
estatisticas = calcular_estatisticas(df_tp)

display_side_by_side(r1[['Home Team', 'Away Team']], estatisticas)

Unnamed: 0,Home Team,Away Team
0,Houston Rockets,Golden State Warriors
1,New Orleans Pelicans,Sacramento Kings
2,Dallas Mavericks,Miami Heat
3,Minnesota Timberwolves,Oklahoma City Thunder
4,Utah Jazz,LA Clippers

Unnamed: 0,media,mediana,minimo,maximo
0,107.3675,107.369999,104.589996,111.410004
1,116.965,116.79,112.93,123.279999
2,110.88375,111.095,108.440002,112.830002
3,110.908749,111.365,107.699997,112.45
4,113.4475,113.845001,110.07,115.9


In [None]:
# @title
def calcular_estatisticas(df):
    estatisticas = pd.DataFrame()
    estatisticas['media'] = df.mean(axis=1)
    estatisticas['mediana'] = df.median(axis=1)
    estatisticas['minimo'] = df.min(axis=1)
    estatisticas['maximo'] = df.max(axis=1)
    return estatisticas

# print(calcular_estatisticas(pd.concat([r1['TP'].reset_index(drop=True), r2['TP'].reset_index(drop=True), results_rf['TP'].reset_index(drop=True),
#                                        results_xgbr['TP'].reset_index(drop=True), r1_['TP'].reset_index(drop=True), r2_['TP'].reset_index(drop=True),
#                                        results_rf_['TP'].reset_index(drop=True), results_xgbr_['TP'].reset_index(drop=True)], axis=1)))

# Suponha que você tenha 8 DataFrames (df1, df2, ..., df8)
dataframes = [r1, r2, results_rf, results_xgbr, r1_, r2_, results_rf_, results_xgbr_]

# Criar um único DataFrame contendo todas as colunas 'TP'
df_tp = pd.concat([df[['TP']].reset_index(drop=True) for df in dataframes], axis=1)

# Aplicar a função calcular_estatisticas()
estatisticas = calcular_estatisticas(df_tp)

display_side_by_side(r1[['Home Team', 'Away Team']], estatisticas)

Unnamed: 0,Home Team,Away Team
0,Houston Rockets,Golden State Warriors
1,New Orleans Pelicans,Sacramento Kings
2,Dallas Mavericks,Miami Heat
3,Minnesota Timberwolves,Oklahoma City Thunder
4,Utah Jazz,LA Clippers

Unnamed: 0,media,mediana,minimo,maximo
0,221.089999,223.26,214.619995,224.02
1,230.657501,229.925,225.209991,237.850006
2,226.443751,225.745,224.279999,230.919998
3,219.108749,221.250001,208.709991,223.16
4,227.315,224.53,222.169998,243.440002


In [None]:
# @title
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
from IPython.display import display_html

# Funções utilitárias

def preprocess_data(df, columns_to_remove, categorical_columns, encoder):
    """
    Preprocessa o DataFrame para treinar ou realizar inferências.

    Args:
        df (pd.DataFrame): DataFrame a ser preprocessado.
        columns_to_remove (list): Lista de colunas a serem removidas.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame preprocessado.
    """
    df = df.drop(columns=columns_to_remove, errors='ignore')
    df['OffRtg_Diff'] = df['OffRtg_HOME'] - df['OffRtg_AWAY']
    df['OffRtg_Diff_HOME'] = df['OffRtg_HOME'] - df['DefRtg_AWAY']
    df['OffRtg_Diff_AWAY'] = df['OffRtg_AWAY'] - df['DefRtg_HOME']
    df['DefRtg_Diff'] = df['DefRtg_HOME'] - df['DefRtg_AWAY']
    df['NetRtg_Diff'] = df['NetRtg_HOME'] - df['NetRtg_AWAY']
    df['PaceRtg_Diff'] = df['Pace_HOME'] - df['Pace_AWAY']
    df[categorical_columns] = df[categorical_columns].apply(encoder.fit_transform)

    # Criação da nova coluna "Outcome"
    if 'Result_HOME' in df.columns and 'Result_AWAY' in df.columns:
        df['Outcome'] = np.where(df['Result_HOME'] > df['Result_AWAY'], 1, 2)
    return df

def display_side_by_side(*dfs):
    """
    Exibe múltiplos DataFrames lado a lado.

    Args:
        *dfs: DataFrames a serem exibidos.
    """
    html_str = ''.join([df.to_html() for df in dfs])
    display_html(html_str.replace('table', 'table style="display:inline"'), raw=True)

# Configurações iniciais
columns_to_remove = ['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result']
categorical_columns = ['Season', 'Home Team', 'Away Team']
encoder = LabelEncoder()

# Carregar e preprocessar dados para treinamento
df = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/ALL.csv')
df = preprocess_data(df, columns_to_remove, categorical_columns, encoder)

# Separação entre features e target
X = df.drop(columns=['Outcome', 'Result_HOME', 'Result_AWAY'])
y = df['Outcome']

# Treinamento do modelo RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X, y)

# Treinamento do modelo GradientBoostingClassifier
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X, y)

# Inferência com os modelos
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')
X_infer = dftp.copy()
X_infer = preprocess_data(X_infer, columns_to_remove, categorical_columns, encoder)

# Predições
dftp['RF_Predicted_Outcome'] = rf_model.predict(X_infer)
dftp['GB_Predicted_Outcome'] = gb_model.predict(X_infer)

# Exibir resultados lado a lado
display_side_by_side(dftp[['Home Team', 'Away Team', 'RF_Predicted_Outcome', 'GB_Predicted_Outcome']])


Unnamed: 0,Home Team,Away Team,RF_Predicted_Outcome,GB_Predicted_Outcome
0,Houston Rockets,Golden State Warriors,1,1
1,New Orleans Pelicans,Sacramento Kings,2,2
2,Dallas Mavericks,Miami Heat,1,1
3,Minnesota Timberwolves,Oklahoma City Thunder,1,2
4,Utah Jazz,LA Clippers,2,2


In [None]:
# @title
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score
from IPython.display import display_html

# Funções utilitárias

def preprocess_data(df, columns_to_remove, categorical_columns, encoder):
    """
    Preprocessa o DataFrame para treinar ou realizar inferências.

    Args:
        df (pd.DataFrame): DataFrame a ser preprocessado.
        columns_to_remove (list): Lista de colunas a serem removidas.
        categorical_columns (list): Lista de colunas categóricas.
        encoder (LabelEncoder): Codificador para variáveis categóricas.

    Returns:
        pd.DataFrame: DataFrame preprocessado.
    """
    df = df.drop(columns=columns_to_remove, errors='ignore')
    df['OffRtg_Diff'] = df['OffRtg_HOME'] - df['OffRtg_AWAY']
    df['OffRtg_Diff_HOME'] = df['OffRtg_HOME'] - df['DefRtg_AWAY']
    df['OffRtg_Diff_AWAY'] = df['OffRtg_AWAY'] - df['DefRtg_HOME']
    df['DefRtg_Diff'] = df['DefRtg_HOME'] - df['DefRtg_AWAY']
    df['NetRtg_Diff'] = df['NetRtg_HOME'] - df['NetRtg_AWAY']
    df['PaceRtg_Diff'] = df['Pace_HOME'] - df['Pace_AWAY']
    df[categorical_columns] = df[categorical_columns].apply(encoder.fit_transform)

    # Criação da nova coluna "Outcome"
    if 'Result_HOME' in df.columns and 'Result_AWAY' in df.columns:
        df['Outcome'] = np.where(df['Result_HOME'] > df['Result_AWAY'], 1, 2)
    return df

def display_side_by_side(*dfs):
    """
    Exibe múltiplos DataFrames lado a lado.

    Args:
        *dfs: DataFrames a serem exibidos.
    """
    html_str = ''.join([df.to_html() for df in dfs])
    display_html(html_str.replace('table', 'table style="display:inline"'), raw=True)

# Configurações iniciais
columns_to_remove = ['Unnamed: 0', 'Date', 'Location', 'AB_HOME', 'AB_AWAY', 'GOW_HOME', 'GOW_AWAY', 'Result']
categorical_columns = ['Season', 'Home Team', 'Away Team']
encoder = LabelEncoder()

# Carregar e preprocessar dados para treinamento
df = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/ALL.csv')
df = preprocess_data(df, columns_to_remove, categorical_columns, encoder)

# Separação entre features e target
X = df.drop(columns=['Outcome', 'Result_HOME', 'Result_AWAY'])
y = df['Outcome']

# Treinamento do modelo RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X, y)

# Treinamento do modelo GradientBoostingClassifier
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X, y)

# Inferência
dftp = pd.read_csv('/content/drive/MyDrive/Datasets/NBA/NBA_TP.csv')

results_rf = pd.DataFrame()
results_gb = pd.DataFrame()

for _, row in dftp.iterrows():
    result_rf = pd.DataFrame()
    result_gb = pd.DataFrame()
    row_df = pd.DataFrame([row])
    X_infer = row_df.copy()
    X_infer = preprocess_data(X_infer, columns_to_remove, categorical_columns, encoder)
    result_rf['Home Team'], result_rf['Away Team'] = row_df['Home Team'], row_df['Away Team']
    result_gb['Home Team'], result_gb['Away Team'] = row_df['Home Team'], row_df['Away Team']
    result_rf['RF_Predicted_Outcome'] = rf_model.predict(X_infer)
    result_gb['GB_Predicted_Outcome'] = gb_model.predict(X_infer)
    results_rf = pd.concat([results_rf, result_rf], ignore_index=True)
    results_gb = pd.concat([results_gb, result_gb], ignore_index=True)

# Exibir resultados lado a lado
results_rf['GB_Predicted_Outcome'] = results_gb['GB_Predicted_Outcome']
display_side_by_side(results_rf)

Unnamed: 0,Home Team,Away Team,RF_Predicted_Outcome,GB_Predicted_Outcome
0,Houston Rockets,Golden State Warriors,1,1
1,New Orleans Pelicans,Sacramento Kings,2,2
2,Dallas Mavericks,Miami Heat,1,1
3,Minnesota Timberwolves,Oklahoma City Thunder,1,1
4,Utah Jazz,LA Clippers,2,2
