In [27]:
import pandas as pd
import numpy as np
from IPython.display import display
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer

df_als = pd.read_csv("nbaallstargames.csv") # All-Star data
df_stat = pd.read_csv("correct_data.csv", sep=";") # Season Statistics

# Delete seasons older than 1980
df_stat = df_stat[df_stat["Year"] >= 1980]
df_stat = df_stat[df_stat["MP"] >= 300]

# Delete unnecessary statistics and customize the statistics in a per game fashion
deletedColumns = ["Column1", "Tm", "GS", "PER", "FTr", "ORB%", "DRB%", "TRB%", "AST%", "STL%", "BLK%", "TOV%", "blanl","OWS", "DWS","WS", "WS/48", "blank2", "OBPM", "DBPM","BPM", "VORP", "FG", "FGA", "3P", "3PA", "2P", "2PA", "eFG%", "FT", "FTA", "ORB", "DRB"]
df_stat = df_stat.drop(columns=deletedColumns)
df_stat = df_stat.assign(MinutesPlayed=df_stat["MP"].astype(int))
df_stat["PointsPerGame"] = (df_stat["PTS"] / df_stat["G"]).round(1)
df_stat["Rebounds"] = (df_stat["TRB"] / df_stat["G"]).round(1)
df_stat["Assists"] = (df_stat["AST"] / df_stat["G"]).round(1)
df_stat["Steals"] = (df_stat["STL"] / df_stat["G"]).round(1)
df_stat["Blocks"] = (df_stat["BLK"] / df_stat["G"]).round(1)
df_stat["Turnovers"] = (df_stat["TOV"] / df_stat["G"]).round(1)
df_stat["PersonalFouls"] = (df_stat["PF"] / df_stat["G"]).round(1)
df_stat = df_stat.assign(Age=df_stat["Age"].astype(int)) 
df_stat = df_stat.drop(columns=["G", "PTS", "TRB", "AST", "STL", "BLK", "TOV", "PF", "MP"])

# Reassign the player and year stat as one singular column for the Season Statistics and moving it the first column
df_stat = df_stat.assign(Season = df_stat["Player"].str.replace("*", '', regex=False).astype(str) + "-" + df_stat["Year"].astype(int).astype(str))
df_stat = df_stat.drop(columns=["Year", "Player"])
player_column = df_stat.pop("Season")
df_stat.insert(0, "Season", player_column)

# Reassign the player and year stat as one singular column for the All-Atar data and moving it the first column
df_als = df_als.assign(Season = df_als["Player"].astype(str) + "-" + df_als["Year"].astype(int).astype(str))
df_als = df_als.drop(columns=["Player", "Year"])
player_column = df_als.pop("Season")
df_als.insert(0, "Season", player_column)

# Merge the two dataframes based on Season and set Role as the new column for
df_merged = df_stat.merge(df_als[["Season", "Role"]], on="Season", how="left")

# Mapping the "Role" to numerical values
role_mapping = {
    "Starter": 2,
    "Reserve": 1,
    "DNP": 1  # DNP should be treated the same as "Reserve"
}

# Map the correct numbers on to the Seasons. Give 0 to all of the players not mentioned in the All-Star Data
df_merged["AllStarStatus"] = df_merged["Role"].map(role_mapping).fillna(0).astype(int)

# Drop the column Role because it is now described by 0, 1 or 2
df_merged = df_merged.drop(columns=["Role"])

# Shuffling and splitting the data into train, validation and test categories with a 60:20:20 split
df_merged = df_merged.sample(frac=1).reset_index(drop=True)
train_size = int(0.6 * len(df_merged))
val_size = int(0.2 * len(df_merged))

train_data = df_merged[:train_size]  # First 60%
val_data = df_merged[train_size:(train_size + val_size)]  # Next 20%
test_data = df_merged[(train_size + val_size):]  # Remaining 20%

import Model

model = Model.BasketballModel()
model.train_and_validate_model(train_data, val_data)

print("Model trained, validated, and saved.")

# Example usage:
exampleStats = {
    'Age': 28,
    'TS%': 0.600,
    '3PAr': 0.400,
    'USG%': 28.5,
    'FG%': 0.480,
    '3P%': 0.380,
    '2P%': 0.520,
    'FT%': 0.850,
    'MinutesPlayed': 2500,
    'PointsPerGame': 20.0,
    'Rebounds': 8.0,
    'Assists': 6.0,
    'Steals': 1.5,
    'Blocks': 6.0,
    'Turnovers': 2.5,
    'PersonalFouls': 2.0
}

# Predict All-Star status
result = model.predict_all_star_status(exampleStats)
print(f"Predicted All-Star status: {result}")

Validation Accuracy: 0.9507

Classification Report (Validation Set):
              precision    recall  f1-score   support

           0       0.97      0.99      0.98      2688
           1       0.47      0.22      0.30       116
           2       0.59      0.45      0.51        58

    accuracy                           0.95      2862
   macro avg       0.68      0.55      0.59      2862
weighted avg       0.94      0.95      0.94      2862

Model trained, validated, and saved.
Predicted All-Star status: Reserve
