In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read and vectorize data
df = pd.read_csv('data.csv')

player_vectors = df.pivot(index='Player', columns='Skill', values='Value')
skill_order = ['Playmaking', 'Shooting', 'Dribbling', 'Rebounding', 'Defense', 'Athleticism']
player_vectors = player_vectors[skill_order]
X = player_vectors.to_numpy()

print("Shape of feature vectors:", X.shape)
print("\nFirst few player vectors:\n", X[:3])

Shape of feature vectors: (15, 6)

First few player vectors:
 [[3.2 3.7 4.9 4.  5.7 5.8]
 [6.1 6.6 6.7 4.  5.5 6.5]
 [3.1 5.1 3.1 2.2 3.  3.8]]


# Algorithms

1) K-means
2) Greedy

In [13]:
# K-means clustering

from sklearn.cluster import KMeans

n_clusters = 5

kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)

cluster_labels = kmeans.labels_

# Balance clusters to exactly 3 players each
for cluster in range(n_clusters):
    cluster_mask = cluster_labels == cluster
    cluster_size = np.sum(cluster_mask)
    
    if cluster_size > 3:
        # Find furthest players from centroid and reassign them
        distances = np.linalg.norm(X[cluster_mask] - kmeans.cluster_centers_[cluster], axis=1)
        sorted_indices = np.argsort(distances)[::-1]
        players_to_move = np.where(cluster_mask)[0][sorted_indices[3:]]
        
        # Assign to nearest under-filled cluster
        for player_idx in players_to_move:
            distances_to_centers = np.linalg.norm(X[player_idx] - kmeans.cluster_centers_, axis=1)
            for target_cluster in np.argsort(distances_to_centers):
                if np.sum(cluster_labels == target_cluster) < 3:
                    cluster_labels[player_idx] = target_cluster
                    break

# Randomly select 1 player from each cluster to form teams of 5
n_teams = 3  # Since we have 15 players total (5 clusters * 3 players)
teams = []

for team_idx in range(n_teams):
    team = []
    # Get one random player from each cluster
    for cluster in range(n_clusters):
        # Get indices of players in this cluster that haven't been selected yet
        available_players = [i for i in range(len(cluster_labels)) 
                           if cluster_labels[i] == cluster and 
                           i not in [p for t in teams for p in t]]
        # Randomly select one player
        selected_player = np.random.choice(available_players)
        team.append(selected_player)
    teams.append(team)

# Print the teams
for i, team in enumerate(teams):
    print(f"\nTeam {i+1}:")
    for player_idx in team:
        print(f"  {player_vectors.index[player_idx]}")


Team 1:
  Oliver
  Max C
  Swain
  Max R
  Ethan L

Team 2:
  Diego
  Rohan
  Aryan
  Izzy
  William

Team 3:
  Logan
  Gavin
  Kian
  Jackson
  Ethan H


In [29]:
# Greedy algorithm -- snake draft by summing player skill values

player_totals = df.groupby('Player')['Value'].sum()
player_totals = player_totals.sort_values(ascending=False)

teams = [[], [], []]

for i in range(15):
    team_idx = i // 3
    if team_idx % 2 == 0:
        # Forward order: 0,1,2
        teams[i % 3].append(player_totals.index[i])
    else:
        # Reverse order: 2,1,0 
        teams[2 - (i % 3)].append(player_totals.index[i])

for i, team in enumerate(teams):
    print(f"\nTeam {i+1}:")
    for player_idx in team:
        print(f"  {player_idx}")


Team 1:
  Diego
  Logan
  Max C
  Ethan L
  Ethan H

Team 2:
  Rohan
  Oliver
  Aryan
  Izzy
  William

Team 3:
  Jackson
  Max R
  Kian
  Swain
  Gavin
