In [None]:
import numpy as np
import json
import os
from sklearn.cluster import KMeans
import pandas as pd
from collections import defaultdict

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path = "/content/drive/MyDrive/parlay code/player data/"
def get_file(team):
  return file_path + "3_4_2024_" + team + "_statistics.json"

In [None]:
# CODE FOR PARSING THROUGH JSON FILES IN DIRECTORY #
#                                                  #

def load_files(player_data, data_point): # player_data is the dictionary we are loading the player stats into
  for file in os.listdir(file_path):
    f = os.path.join(file_path, file)
    if os.path.isfile(f):
      i1 = f.find("2024_")
      i2 = f.find("_statistics")
      filename = f[i1+5:i2]

      with open(get_file(filename), 'r') as json_data:
        data_dict = json.load(json_data)

      players = data_dict['players']
      player_points_per_game = player_data

      for player in players:
          name = player['full_name']
          total_data_points = player['total'][data_point]
          games_played = player['total']['games_played']
          data_points_per_game = total_data_points / games_played if games_played else 0
          player_minutes_per_game = avg_minutes_per_game = player['total']['minutes'] / games_played if games_played else 0

          ## BELOW CODE TO FILTER OUT IRRELEVANT PLAYERS ##
          if player_minutes_per_game >= 20:
            if data_point == "points":
              if data_points_per_game >= 7:
                player_points_per_game[name] = data_points_per_game
            elif data_point == "rebounds":
              if data_points_per_game >= 4:
                player_points_per_game[name] = data_points_per_game
            elif data_point == "assists":
              if data_points_per_game >= 3:
                player_points_per_game[name] = data_points_per_game

In [None]:
## DEFINING OUR DATAPOINTS ##
player_points = {}
player_rebounds = {}
player_assists = {}

load_files(player_points, "points")
load_files(player_rebounds, "rebounds")
load_files(player_assists, "assists")

In [None]:
## PERFORMING K-MEANS ##

points = np.array(list(player_points.values())).reshape(-1,1)
rebounds = np.array(list(player_rebounds.values())).reshape(-1,1)
assists = np.array(list(player_assists.values())).reshape(-1,1)

kmeans1 = KMeans(n_clusters=8, n_init=10, random_state=42)
kmeans2 = KMeans(n_clusters=8, n_init=10, random_state=43)
kmeans3 = KMeans(n_clusters=8, n_init=10, random_state=44)

points_cluster = defaultdict(list)
rebounds_cluster = defaultdict(list)
assists_cluster = defaultdict(list)

kmeans1.fit(points)
points_labels = kmeans1.labels_

kmeans2.fit(rebounds)
rebounds_labels = kmeans2.labels_

kmeans3.fit(assists)
assists_labels = kmeans3.labels_


points_cluster_map = pd.DataFrame()
rebounds_cluster_map = pd.DataFrame()
assists_cluster_map = pd.DataFrame()

points_cluster_map['player_name'] = list(player_points.keys())
points_cluster_map['data_index'] = list(player_points.values())
points_cluster_map['cluster'] = points_labels

rebounds_cluster_map['player_name'] = list(player_rebounds.keys())
rebounds_cluster_map['data_index'] = list(player_rebounds.values())
rebounds_cluster_map['cluster'] = rebounds_labels

assists_cluster_map['player_name'] = list(player_assists.keys())
assists_cluster_map['data_index'] = list(player_assists.values())
assists_cluster_map['cluster'] = assists_labels

for i in range(8):
  points_cluster[i].extend(points_cluster_map[points_cluster_map.cluster==i]['player_name'].tolist())
  rebounds_cluster[i].extend(rebounds_cluster_map[rebounds_cluster_map.cluster==i]['player_name'].tolist())
  assists_cluster[i].extend(assists_cluster_map[assists_cluster_map.cluster==i]['player_name'].tolist())

In [None]:
print(assists_cluster[0])

['Dejounte Murray', 'Draymond Green', 'Tre Jones', 'Ben Simmons', 'Austin Reaves', "De'Aaron Fox", 'Malcolm Brogdon', 'Brandon Ingram', 'Vasilije Micic', 'Terry Rozier', 'Kevin Durant', 'Buddy Hield', 'Kyle Lowry', 'Joel Embiid']
