In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

All imports are shown above

In [6]:
data_path = 'all_season.csv'
players_df = pd.read_csv(data_path)

Loading the dataset

In [8]:
selected_years = ['2015-16', '2016-17', '2017-18', '2018-19', '2019-20']
pool_df = players_df[players_df['season'].isin(selected_years)]

Filtering the dataframe so that it only includes players who played
between 2015-2016 and 2019-2020

In [9]:
pool_df = pool_df.sample(n=100, random_state=42)

This piece of code randomly selects 100 players from the filtered dataset.

In [10]:
def calculate_role_scores(df):
    df['scorer_score'] = df['pts']
    df['playmaker_score'] = df['ast']
    df['rebounder_score'] = df['reb']
    df['defender_score'] = df['net_rating']
    df['utility_score'] = (df['pts'] + df['reb'] + df['ast']) / 3  # Average instead of sum
    df['impact_score'] = df['usg_pct'] * df['net_rating']
    df['size_factor'] = df['player_height'] * df['player_weight']
    return df

This function takes in information on the frame of data and then creates new columns that reflect which piece of the basketball player we are looking at. For instance, pts for affecting the score, and assists affecting the playmaking score. The other factor here we created are more advanced statistics like the utility score which is a summation of all three, shooting, rebounding, and assists.

In [11]:
scored_df = calculate_role_scores(pool_df)

This function is straightforward just preparing the data using the function that we created above

In [12]:
features = scored_df[['scorer_score', 'playmaker_score', 'rebounder_score', 
                      'defender_score', 'utility_score', 'impact_score', 'size_factor']].values
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
X_tensor = torch.tensor(features_scaled, dtype=torch.float32)
class RoleSpecificMLP(nn.Module):
    def __init__(self, input_size, hidden_sizes):
        super(RoleSpecificMLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_sizes[0], hidden_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_sizes[1], 1)  # Single output for ranking
        )
    def forward(self, x):
        return self.layers(x)
input_size = X_tensor.shape[1]
hidden_sizes = [128, 64]

Here we are still preparing the data for the neural network that we are going to use. Features becomes a dataset of the raw data for the 100 players selected within the timeframe. Next we create an object scaler, which we then call on features to scale are data to a mean of 0 and a standard deviation of 1. Finally we call torch.tensor to prep the data for the pytorch libraries. 

In [13]:
def train_role_model(X_tensor, target_score, role_name):
    y_tensor = torch.tensor(target_score, dtype=torch.float32).view(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
    model = RoleSpecificMLP(input_size, hidden_sizes)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epochs = 100
    for epoch in range(epochs):
        model.train()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            print(f'{role_name} - Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
        predictions = model(X_tensor).numpy().flatten()
        scored_df[f'{role_name}_predicted_score'] = predictions

    grid_scores = np.array(predictions).reshape(10, 10)
    return grid_scores, scored_df[[f'{role_name}_predicted_score', 'player_name', 'team_abbreviation', 'scorer_score', 
                                   'playmaker_score', 'rebounder_score', 'defender_score', 'utility_score']]


This function begins by converting the target scores into a PyTorch tensor and reshaping it for compatibility with the model. The function then splits the input feature tensor into training and testing sets, using 80% of the data for training and 20% for evaluation. An instance of the RoleSpecificMLP model is created, and the Mean Squared Error (MSE) loss function is defined alongside the Adam (Adaptive movement estimation) optimizer for efficient parameter updates. The training process runs for 100 epochs, during which the model learns to minimize the loss between predicted and actual scores through forward and backward passes. Throughout the training, it logs the loss every ten epochs to monitor performance. After training, the model is evaluated using the entire feature dataset, and predictions are generated without gradient tracking. These predictions are stored in the original DataFrame, alongside relevant player information. Finally, the function reshapes the predictions into a 10x10 grid format for visualization and returns both the grid scores and the updated DataFrame, making it useful for analyzing player performance in various roles.

In [14]:
scorer_grid, scorer_candidates = train_role_model(X_tensor, scored_df['scorer_score'].values, 'Scorer')
playmaker_grid, playmaker_candidates = train_role_model(X_tensor, scored_df['playmaker_score'].values, 'Playmaker')
rebounder_grid, rebounder_candidates = train_role_model(X_tensor, scored_df['rebounder_score'].values, 'Rebounder')
defender_grid, defender_candidates = train_role_model(X_tensor, scored_df['defender_score'].values, 'Defender')
utility_grid, utility_candidates = train_role_model(X_tensor, scored_df['utility_score'].values, 'Utility')


NameError: name 'RoleSpecificMLP' is not defined

Here we call the model to train for all 5 of the roles on our team

In [15]:
all_candidates = pd.concat([scorer_candidates, playmaker_candidates, rebounder_candidates, defender_candidates, utility_candidates])
assigned_players = set()
optimal_team = []
role_order = [
    ('Scorer', scorer_candidates),
    ('Playmaker', playmaker_candidates),
    ('Rebounder', rebounder_candidates),
    ('Defender', defender_candidates),
    ('Utility', utility_candidates)
]
selected_positions = {}
for role_name, candidates in role_order:
    candidates = candidates.sort_values(by=f'{role_name}_predicted_score', ascending=False)
    for _, player in candidates.iterrows():
        if player['player_name'] not in assigned_players:
            player['predicted_role'] = role_name
            optimal_team.append(player)
            assigned_players.add(player['player_name'])
            grid_index = candidates.index.get_loc(player.name)
            selected_positions[role_name] = (grid_index // 10, grid_index % 10, player['player_name'])
            break


NameError: name 'scorer_candidates' is not defined

The code snippet systematically selects players for specific basketball roles to form an optimal team based on predicted scores. It begins by defining an ordered list of roles and their corresponding candidate DataFrames. For each role, candidates are sorted by their predicted scores in descending order, ensuring the highest-rated players are prioritized. The inner loop iterates through the sorted candidates, checking if a player has already been assigned a role. If a player is available, they are assigned the current role, added to the optimal_team, and marked as assigned to prevent duplicates. The loop breaks after selecting one player for each role, ensuring a single assignment per position. This process continues for all defined roles, resulting in an optimal team composed of the best players for each specific role, effectively leveraging their predicted performance metrics.

In [16]:
optimal_team_df = pd.DataFrame(optimal_team)
print("\nOptimal 5-Man Team Based on Separate Role-Specific Models (with Unique Selections):")
print(optimal_team_df[['player_name', 'team_abbreviation', 'scorer_score', 'playmaker_score', 
                       'rebounder_score', 'defender_score', 'utility_score', 'predicted_role']])



Optimal 5-Man Team Based on Separate Role-Specific Models:


KeyError: "None of [Index(['player_name', 'team_abbreviation', 'scorer_score', 'playmaker_score',\n       'rebounder_score', 'defender_score', 'utility_score', 'predicted_role'],\n      dtype='object')] are in the [columns]"

This code section creates a DataFrame, optimal_team_df, from the list of selected players for the optimal basketball team. It prints a message indicating the display of the optimal five-man team and then outputs specific columns, including player names, team abbreviations, various role-related scores, and the assigned predicted roles. This structured presentation helps visualize the team's composition and highlights each player's performance metrics in their respective roles.

In [17]:
def plot_heatmap_with_selection(grid, role_name, candidates_df):
    plt.figure(figsize=(8, 6))
    im = plt.imshow(grid, cmap='RdBu_r', interpolation='nearest')
    plt.colorbar(label='Predicted Score')
    plt.title(f'{role_name} Heatmap of Predicted Scores')
    plt.xlabel('Columns')
    plt.ylabel('Rows')
    plt.xticks(ticks=np.arange(10), labels=np.arange(1, 11))
    plt.yticks(ticks=np.arange(10), labels=np.arange(1, 11))
    

    selected_player = optimal_team_df[optimal_team_df['predicted_role'] == role_name]
    for _, player in selected_player.iterrows():
        grid_index = candidates_df[candidates_df['player_name'] == player['player_name']].index[0]
        row, col = divmod(grid_index, 10)
        plt.text(col, row, player['player_name'], ha='center', va='center', 
                 color='black', fontweight='bold', bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
    
    plt.show()

plot_heatmap_with_selection(scorer_grid, 'Scorer', scorer_candidates)
plot_heatmap_with_selection(playmaker_grid, 'Playmaker', playmaker_candidates)
plot_heatmap_with_selection(rebounder_grid, 'Rebounder', rebounder_candidates)
plot_heatmap_with_selection(defender_grid, 'Defender', defender_candidates)
plot_heatmap_with_selection(utility_grid, 'Utility', utility_candidates)

NameError: name 'scorer_grid' is not defined