In [1]:
# All the imports
import pandas as pd
import statistics
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from torch import nn, optim
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix
from joblib import dump, load
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, mean_squared_error
import imblearn.over_sampling
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from nltk.corpus import wordnet
import random
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors

In [3]:
path_detail_df = pd.read_csv("./Shooting Form Data/path_detail.csv")
players_ids = path_detail_df['pid'].unique()

In [5]:
map_pids_to_player = {}
for i in range(len(players_ids)):
  filtered_df = path_detail_df[path_detail_df['pid'] == players_ids[i]]
  other_column1_values = filtered_df['fnm'].tolist()
  other_column2_values = filtered_df['lnm'].tolist()
  s = ""
  s += other_column1_values[0]
  s += " "
  s += other_column2_values[0]
  map_pids_to_player[players_ids[i]] = s

In [6]:
pids_to_use = [201935, 203081, 2594, 2747, 201980, 200755, 201142, 201939, 202331, 2544, 1717, 101108, 201566, 202681, 202691, 202710, 2546, 202695, 202391, 201565, 977, 203110, 203935, 2738, 1938]

In [8]:
def take_every_third_and_limit(x):
    return x[::15][:20]  # Takes every third element and limits to the first 100 points

agg_path_detail_df = path_detail_df.groupby('pid').agg({
    'cy': lambda x: take_every_third_and_limit(list(x)),
    'cz': lambda x: take_every_third_and_limit(list(x)),
    'fnm': 'first',  # or 'last' or another appropriate aggregation function
    'lnm': 'first'   # or 'last' or another appropriate aggregation function
}).reset_index()

# Filter for specific 'pid' values
condition = agg_path_detail_df['pid'].isin(pids_to_use)
agg_path_detail_df = agg_path_detail_df.loc[condition]

In [9]:
scaled_rows = []

# Initialize MinMaxScaler
scaler = MinMaxScaler()

for i in range(len(agg_path_detail_df)):
    row = agg_path_detail_df.iloc[i]
    scaled_row = row.copy()

    for col in ['cy', 'cz']:
        try:
            if isinstance(row[col], list):
                data_array = np.array(row[col]).reshape(-1, 1)
                scaled_data = scaler.fit_transform(data_array)

                scaled_row[col] = scaled_data.flatten().tolist()

            else:
                print(f"Skipping row {i} for column {col} as it is not a list.")
        except Exception as e:
            raise
    scaled_rows.append(scaled_row)
scaled_path_detail_df = pd.DataFrame(scaled_rows, columns=agg_path_detail_df.columns)

In [10]:
final = []
for i in range(len(scaled_path_detail_df)):
    li = []
    print("Player Name:", map_pids_to_player[scaled_path_detail_df.iloc[i]['pid']])
    for x in range(len(scaled_path_detail_df.iloc[i]['cy'])):
      temp = []
      temp.append(scaled_path_detail_df.iloc[i]['cy'][x])
      temp.append(scaled_path_detail_df.iloc[i]['cz'][x])
      li.append(temp)
    final.append(li)

Player Name: Kobe Bryant
Player Name: Dirk Nowitzki
Player Name: Manu Ginobili
Player Name: LeBron James
Player Name: Carmelo Anthony
Player Name: Kyle Korver
Player Name: Andre Iguodala
Player Name: JR Smith
Player Name: Chris Paul
Player Name: JJ Redick
Player Name: Kevin Durant
Player Name: Derrick Rose
Player Name: Russell Westbrook
Player Name: James Harden
Player Name: Stephen Curry
Player Name: Danny Green
Player Name: Paul George
Player Name: Jeremy Lin
Player Name: Kyrie Irving
Player Name: Klay Thompson
Player Name: Kawhi Leonard
Player Name: Jimmy Butler
Player Name: Damian Lillard
Player Name: Draymond Green
Player Name: Marcus Smart


In [11]:
players_database = []
for x in range(len(scaled_path_detail_df)):
  cy = scaled_path_detail_df.iloc[x]['cy']
  cz = scaled_path_detail_df.iloc[x]['cz']
  formatted_array = [[cy[i], cz[i]] for i in range(len(cy))]
  players_database.append(formatted_array)
players_database = np.array(players_database)

In [12]:
labels = []
for i in range(len(scaled_path_detail_df)):
  labels.append(map_pids_to_player[scaled_path_detail_df.iloc[i]['pid']])
labeled_arrays = [(labels[i], pd.DataFrame(players_database[i])) for i in range(len(players_database))]

In [13]:
final_df = pd.concat([df.assign(Label=label) for label, df in labeled_arrays])
X = final_df.drop('Label', axis=1)
y = final_df['Label']

In [14]:
f = [1.0, 0.8284600389863547, 0.6695906432748535, 0.5477582846003898, 0.46003898635477586, 0.39766081871345027, 0.361598440545809, 0.3693957115009746, 0.39766081871345027, 0.466374269005848, 0.5599415204678363, 0.6442495126705653, 0.6842105263157895, 0.6764132553606238, 0.6028265107212476, 0.4956140350877193, 0.3698830409356725, 0.23732943469785572, 0.10916179337231968, 0.0]
s = [0.0, 0.07386888273314876, 0.14573714989227463, 0.21406586642043707, 0.2730070791012619, 0.3228685749461373, 0.3742690058479533, 0.42105263157894735, 0.4678362573099415, 0.5112342259156664, 0.559248999692213, 0.6180363188673438, 0.6869806094182827, 0.7559248999692213, 0.8248691905201601, 0.8845798707294552, 0.9315173899661434, 0.9653739612188365, 0.9876885195444752, 1.0]
test_aadhi = [[f[i], s[i]] for i in range(len(f))]

In [15]:
vik = [0.8051626591230552, 0.4579207920792074, 0.189179632248939, 0.0, 0.140735502121641, 0.13896746817538908, 0.12057991513437055, 0.17185289957567212, 0.30445544554455467, 0.4946958981612448, 0.6877652050919381, 0.8490099009900991, 0.9494342291371993, 1.0, 1.0, 0.9695898161244695, 0.9045261669024045, 0.8543140028288544, 0.8606789250353607, 0.7446958981612447]
viky =  [0.0, 0.15062712460438418, 0.24838823115695713, 0.31227288711757123, 0.3709998827804478, 0.3678349548704724, 0.37205485875043964, 0.40663462665572625, 0.46606493963193063, 0.4919704606728402, 0.5362794514124957, 0.596061423045364, 0.6682686672136913, 0.7456335716797562, 0.8215918415191654, 0.8903997186730747, 0.9449068104559841, 0.9859336537334428, 0.9803071152268198, 1.0]
test_vik = [[vik[i], viky[i]] for i in range(len(vik))]

In [16]:
chris = [[1.0, 0.0], [0.694989106753813, 0.08669454008853898], [0.44589687726942623, 0.17338908017707821], [0.2628903413217139, 0.249754058042302], [0.140885984023239, 0.3146827348745695], [0.05374001452432821, 0.38108706345302507], [0.007262164124909234, 0.44749139203148053], [0.0, 0.5126660108214461], [0.01960784313725487, 0.5742744712247909], [0.0653594771241829, 0.6309640924741761], [0.1960784313725489, 0.6844564682734874], [0.39288307915758897, 0.7283571077225773], [0.5933188090050835, 0.7741023118544024], [0.7785039941902687, 0.8202164289227742], [0.9317356572258533, 0.8622725036891293], [0.9426289034132173, 0.8991637973438269], [0.9448075526506899, 0.9331037875061485], [0.9389978213507626, 0.9613871126414166], [0.9135802469135803, 0.9833989178553861], [0.8482207697893972, 1.0]]

In [17]:
knn_chebyshev = KNeighborsClassifier(n_neighbors=1, metric='chebyshev')
knn_chebyshev.fit(X, y)

In [18]:
knn_euclidean = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
knn_euclidean.fit(X, y)

In [19]:
knn_manhattan = KNeighborsClassifier(n_neighbors=1, metric='manhattan')
knn_manhattan.fit(X, y)

In [20]:
def give_prediction(test_array):
  options = []
  prediction1 = knn_chebyshev.predict(test_array)

  prediction2 = knn_euclidean.predict(test_array)

  prediction3 = knn_manhattan.predict(test_array)

  if(prediction1[0] == prediction2[0] == prediction3[0]):
    options.append(prediction1[0])
  else:
    options.append(prediction1[0])
    options.append(prediction2[0])
    options.append(prediction3[0])
  return options

In [21]:
give_prediction(chris)

['Kyle Korver']

In [22]:
aaron = [[0.7453773113443277, 0.0], [0.42778610694652647, 0.14691778843376294], [0.2588705647176412, 0.25114102490034085], [0.20314842578710646, 0.3336992316136115], [0.24012993503248378, 0.41619966491420657], [0.33508245877061477, 0.5147033335259114], [0.47351324337831086, 0.49240279623317346], [0.6434282858570713, 0.5433011728002772], [0.8073463268365818, 0.5987636489687447], [0.9375312343828086, 0.6592524120399793], [1.0, 0.725114102490034], [0.9590204897551223, 0.7880871223063147], [0.8370814592703648, 0.8472470968860131], [0.6841579210394803, 0.897567739326362], [0.5032483758120938, 0.9345427234386735], [0.3620689655172413, 0.9637182968397943], [0.2491254372813593, 0.9757351666762955], [0.06696651674162919, 0.9877520365127969], [0.006996501749125433, 0.9963025015887689], [0.0, 1.0]]
give_prediction(aaron)

['Chris Paul']