In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.calibration import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, accuracy_score, classification_report
import time

players = pd.read_csv("data_prepared/players_teams.csv")
players['playoff'] = players['playoff'].map({'Y': 1, 'N': 0})
players['confID'] = players['confID'].map({'EA': 0, 'WE': 1})

def encode_categorical_columns(df):
    label_encoder = LabelEncoder()
    for col in df.select_dtypes(include=['object']).columns:
        if col == 'playoff' or col == 'confID':
            continue
        else:
            df[col] = label_encoder.fit_transform(df[col])
    return df

encode_categorical_columns(players)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score
import time

def second(data, year):

    train = data[data['year'] < year]
    test = data[data['year'] == year]

    X_train = train.drop("playoff", axis=1)
    Y_train = train["playoff"]

    X_test = test.drop("playoff", axis=1)
    Y_test = test["playoff"]

    start_time = time.time()
    decision_tree = DecisionTreeClassifier(random_state=42)
    decision_tree.fit(X_train, Y_train)
    end_time = time.time()
    
    y_pred_proba = decision_tree.predict_proba(X_test)[:, 1]
    test['probability'] = y_pred_proba
    team_probs = test.groupby(['tmID', 'year'])['probability'].mean().reset_index()
    team_probs['predicted_playoff'] = 0
    team_probs.loc[team_probs['probability'].nlargest(4).index, 'predicted_playoff'] = 1
    y_pred = test.merge(team_probs[['tmID', 'year', 'predicted_playoff']], on=['tmID', 'year'], how='left')['predicted_playoff']

    print(f"Time: {(end_time - start_time):.2f} segundos")
    print(f"Precision: {precision_score(Y_test, y_pred):.2f}")
    print(f"Recall: {recall_score(Y_test, y_pred):.2f}")
    print(f"F1: {f1_score(Y_test, y_pred):.2f}")
    print(f"Accuracy: {accuracy_score(Y_test, y_pred):.2f}")
    print(f"AUC: {roc_auc_score(Y_test, y_pred):.2f}")
    
def first(year):
    data1 = players[players['confID'] == 0]
    data2 = players[players['confID'] == 1]
    print("--- First Conference ---\n")
    second(data1, year)
    print("\n--- Second Conference ---\n")
    second(data2,year)

first(10)