# AC Project

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

teams_data = pd.read_csv("Dataset/teams.csv")
teams_correlation_matrix = teams_data.corr(numeric_only=True)

# Generate a mask for the upper triangle
mask = abs(teams_correlation_matrix) < 0.5

# Create a heatmap
plt.figure(figsize=(40, 30))
sns.heatmap(teams_correlation_matrix, annot=True, cmap='coolwarm', mask=mask)

plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Add points per game (PPG) column
teams_data['PPG'] = teams_data['o_pts'] / teams_data['GP']

ppg_per_team = teams_data.groupby(['name', 'year'])['PPG'].mean().reset_index()

plt.figure(figsize=(12, 10))

colors = plt.cm.tab20([i/len(ppg_per_team['name'].unique()) for i in range(len(ppg_per_team['name'].unique()))])
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=colors)

for team in ppg_per_team['name'].unique():
    team_data = ppg_per_team[ppg_per_team['name'] == team]
    plt.plot(team_data['year'], team_data['PPG'], label=team)

plt.title('Points Per Game (PPG) Evolution per Year for Each Team')
plt.xlabel('Year')
plt.ylabel('PPG')
plt.legend(loc='upper left')
plt.grid(True)
plt.show()

In [None]:
# Read players_data
players_teams_data = pd.read_csv("Dataset/players_teams.csv")
players_info_data = pd.read_csv("Dataset/players.csv")

# Merge players_teams_data and players_info_data
players_data = pd.merge(players_teams_data, players_info_data, left_on='playerID', right_on='bioID', how='left')

# Drop rows that are not needed
players_data = players_data[players_data.pos.notnull()]
players_data.drop(players_data[players_data['weight'] < 60].index, inplace = True)
players_data.drop(players_data[players_data['height'] < 50 ].index, inplace = True)
players_data.drop(columns=["firstseason","lastseason","deathDate"],inplace=True)
players_data.reset_index()
players_data.describe()

players_data.head()

In [None]:
# Read awards data
awards_players_data = pd.read_csv("Dataset/awards_players.csv")

# Add awards column to players_data
players_data['awards'] = 0

for idx, player in players_data.iterrows():
    awards_until_target_season = awards_players_data[(awards_players_data['playerID'] == player['playerID']) & (awards_players_data['year'] < player['year'])]
    if(not awards_until_target_season.empty):
        players_data.loc[[idx], 'awards'] =  len(awards_until_target_season)

players_data.head()

In [None]:
from datetime import datetime, date

# Convert birthDate to age
def age(born, year): 
    born = datetime.strptime(born, "%Y-%m-%d").date()
    today = date.today()
    return (today.year-(25-year)) - born.year - ((today.month, today.day) < (born.month, born.day)) 

# Add variables to teams data
for idx, team in teams_data.iterrows():
        # Add average player height column to teams_data
        team_players_until_target_season = players_data[(players_data["year"] == team["year"] )&( players_data["tmID"] == team["tmID"])]
        team_players_avg_height = team_players_until_target_season["height"].mean()
        teams_data.loc[[idx], 'avg_height'] = team_players_avg_height

        # Add average player weight column to teams_data
        team_players_avg_weight = team_players_until_target_season["weight"].mean()
        teams_data.loc[[idx], 'avg_weight'] = team_players_avg_weight

        # Add average player age column to teams_data
        players_ages = team_players_until_target_season["birthDate"].apply(age, args=(team["year"],))
        team_players_avg_age = players_ages.mean()
        teams_data.loc[[idx], 'avg_age'] = team_players_avg_age

        # Add awards column to teams_data
        team_awards_until_target_season = players_data[(players_data["year"] == team["year"] )&( players_data["tmID"] == team["tmID"])]
        team_awards_num = team_awards_until_target_season["awards"].sum()
        teams_data.loc[[idx], 'awards'] = team_awards_num

teams_data

## Algorithms

### Data split

In [None]:
teams_data.drop(columns=['rank', 'firstRound', 'semis', 'finals'],inplace=True)

char_map = {'N': 0, 'Y': 1, 'L': 0, 'W': 1}
teams_data['playoff'] = teams_data['playoff'].map(char_map)
teams_data['playoff'] = teams_data['playoff'].fillna(-1)

tmIds = teams_data['tmID'].unique()
for i in range (len(tmIds)):
    teams_data['tmID'] = teams_data['tmID'].replace(tmIds[i],i)

confids = teams_data['confID'].unique()
for i in range (len(confids)):
    teams_data['confID'] = teams_data['confID'].replace(confids[i],i)

arenas = teams_data['arena'].unique()
for i in range (len(arenas)):
    teams_data['arena'] = teams_data['arena'].replace(arenas[i],i)

tmNames = teams_data['name'].unique()
for i in range (len(tmNames)):
    teams_data['name'] = teams_data['name'].replace(tmNames[i],i)

teams_test = teams_data[teams_data['year'] == 9]
teams_data = teams_data[teams_data['year'] < 9]


X_train, X_test, y_train, y_test = teams_data.drop(columns=['playoff']), teams_test.drop(columns=['playoff']), teams_data['playoff'], teams_test['playoff']

### Abstract Model

In [None]:
from abc import ABC, abstractmethod
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score

class AbstractModel(ABC):
    def __init__(self, X_train, X_test, y_train, y_test, algorithm):
        self.X_train = X_train.values
        self.X_test = X_test.values
        self.y_train = y_train.values
        self.y_test = y_test.values
        self.algorithm = algorithm

    @abstractmethod
    def train(self):
        pass

    def predict(self, clf):
        return clf.predict(self.X_test)

    def evaluate(self, pred):
        return {
            'accuracy': accuracy_score(self.y_test, pred),
            'f1_score': f1_score(self.y_test, pred)
        }

    def confusion_matrix(self, pred):
        cm = confusion_matrix(self.y_test, pred)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No', 'Yes'])
        disp.plot()

### Decision Tree

In [None]:
from sklearn import tree

class DecisionTreeModel(AbstractModel):
    def __init__(self, X_train, X_test, y_train, y_test):
        super().__init__(X_train, X_test, y_train, y_test, tree.DecisionTreeClassifier())

    def train(self):
        clf = self.algorithm.fit(self.X_train, self.y_train)
        return clf

### Support Vector Machine

In [None]:
from sklearn import svm

class SVMModel(AbstractModel):
    def __init__(self, X_train, X_test, y_train, y_test):
        super().__init__(X_train, X_test, y_train, y_test, svm.SVC())

    def train(self):
        clf = self.algorithm.fit(self.X_train, self.y_train)
        return clf

### Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

class NaiveBayesModel(AbstractModel):
    def __init__(self, X_train, X_test, y_train, y_test):
        super().__init__(X_train, X_test, y_train, y_test, GaussianNB())

    def train(self):
        clf = self.algorithm.fit(self.X_train, self.y_train)
        return clf

### Neural Network

In [None]:
from sklearn.neural_network import MLPClassifier

class NeuralNetworkModel(AbstractModel):
    def __init__(self, X_train, X_test, y_train, y_test):
        super().__init__(X_train, X_test, y_train, y_test, MLPClassifier())

    def train(self):
        clf = self.algorithm.fit(self.X_train, self.y_train)
        return clf

### Apply models

In [None]:
# Apply Decision Tree Model
decisionTreeModel = DecisionTreeModel(X_train, X_test, y_train, y_test)
clf = decisionTreeModel.train()
pred = decisionTreeModel.predict(clf)
print(decisionTreeModel.evaluate(pred))
decisionTreeModel.confusion_matrix(pred)

In [None]:
# Apply SVM Model
svmModel = SVMModel(X_train, X_test, y_train, y_test)
clf = svmModel.train()
pred = svmModel.predict(clf)
print(svmModel.evaluate(pred))
svmModel.confusion_matrix(pred)

In [None]:
# Apply Naive Bayes Model
naiveBayesModel = NaiveBayesModel(X_train, X_test, y_train, y_test)
clf = naiveBayesModel.train()
pred = naiveBayesModel.predict(clf)
print(naiveBayesModel.evaluate(pred))
naiveBayesModel.confusion_matrix(pred)

In [None]:
# Apply Neural Network Model
neuralNetworkModel = NeuralNetworkModel(X_train, X_test, y_train, y_test)
clf = neuralNetworkModel.train()
pred = neuralNetworkModel.predict(clf)
print(neuralNetworkModel.evaluate(pred))
neuralNetworkModel.confusion_matrix(pred)