In [1]:
# All the imports
import pandas as pd
import statistics
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from torch import nn, optim
import torch.nn.functional as F
from sklearn.metrics import classification_report, confusion_matrix
from joblib import dump, load
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, mean_squared_error
import imblearn.over_sampling
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from nltk.corpus import wordnet
import random
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors

In [2]:
path_detail_df = pd.read_csv("path_detail.csv")
print(path_detail_df.columns)

Index(['pid', 'fnm', 'lnm', 'hght', 't', 'ddst', 'cx', 'cy', 'cz', 'cvx',
       'cvy', 'cvz', 'cv', 'cax', 'cay', 'caz', 'ca', 'rt', 'dx', 'dy', 'dz',
       'd'],
      dtype='object')


In [3]:
players_ids = path_detail_df['pid'].unique()
print(players_ids)

[201935 203081   2772 202083 200768   2594 201567   2747 201980 201587
 101123 202738 200755 201142 201939   2037 202331 203082 202688   2544
 101112 101139 202689 203484 203504   2440 201569 202330   1717   2207
 101150 201583 202718   1938 101108 201144 201566 200751   1718 203114
 202681 201960 203078   2749 201167 202349 201158 202691 201952 203141
 201988 201145 101181 203463 202340 202704 201156 101107 202710 201163
   1713 201588 203496 201229 201976 201166 203138 202694   2547   2399
   2546 202322 202695 201943   2590 101114 200794 203095 101145 201571
 201609 203506   2746 202325 201975 201596 202335 201627   2755 201977
 203501 201564 202703 202339 203477   1891 201150 202391 203079 101141
 201162 200826   2406 203109 203503 201584 201565 203468 203118   2210
 201568 201951 203521 201147 200752   2736 202713 202697 202699 203087
 202390 203096   2550   2734 200782    977 202362 203544 203482 201573
 202708 203918 201956   2581 201152 203487   2564 201961 202711 201936
 10111

In [4]:
map_pids_to_player = {}
for i in range(len(players_ids)):
  filtered_df = path_detail_df[path_detail_df['pid'] == players_ids[i]]
  other_column1_values = filtered_df['fnm'].tolist()
  other_column2_values = filtered_df['lnm'].tolist()
  s = ""
  s += other_column1_values[0]
  s += " "
  s += other_column2_values[0]
  map_pids_to_player[players_ids[i]] = s

In [5]:
pids_to_use = [201935, 203081, 2594, 2747, 201980, 200755, 201142, 201939, 202331, 2544, 1717, 101108, 201566, 202681, 202691, 202710, 2546, 202695, 202391, 201565, 977, 203110, 203935, 2738, 1938]

In [6]:
def take_every_third_and_limit(x):
    return x[::15][:20]  # Takes every third element and limits to the first 100 points

agg_path_detail_df = path_detail_df.groupby('pid').agg({
    'cy': lambda x: take_every_third_and_limit(list(x)),
    'cz': lambda x: take_every_third_and_limit(list(x)),
    'fnm': 'first',  # or 'last' or another appropriate aggregation function
    'lnm': 'first'   # or 'last' or another appropriate aggregation function
}).reset_index()

# Filter for specific 'pid' values
condition = agg_path_detail_df['pid'].isin(pids_to_use)
agg_path_detail_df = agg_path_detail_df.loc[condition]
print(agg_path_detail_df)

        pid                                                 cy  \
1       977  [0.179732243711186, 0.2502554728731617, 0.3129...   
3      1717  [0.3390659980337656, 0.368825182238127, 0.3961...   
6      1938  [0.0387725708582697, 0.0217013990895975, 0.006...   
15     2544  [0.1174401031741558, 0.1666395624331171, 0.215...   
16     2546  [0.1263262525880592, 0.1795605238972276, 0.229...   
24     2594  [0.3299909793254686, 0.3180648962354864, 0.307...   
27     2738  [0.0201055861956078, 0.0353718864989727, 0.051...   
29     2747  [-0.2081174657570886, -0.1376115258502426, -0....   
35   101108  [0.5729932713564658, 0.6291368050257897, 0.678...   
51   200755  [0.9615006234606084, 0.9288970716525196, 0.896...   
57   201142  [0.1680160363903216, 0.2356226793842775, 0.301...   
73   201565  [0.0935275546425002, 0.1366763531451952, 0.177...   
74   201566  [0.1585478620004604, 0.191340807782789, 0.2206...   
88   201935  [0.1680846790289842, 0.119290607812078, 0.0731...   
91   20193

In [7]:
scaled_rows = []

# Initialize MinMaxScaler
scaler = MinMaxScaler()

for i in range(len(agg_path_detail_df)):
    row = agg_path_detail_df.iloc[i]
    scaled_row = row.copy()

    for col in ['cy', 'cz']:
        try:
            if isinstance(row[col], list):
                data_array = np.array(row[col]).reshape(-1, 1)
                scaled_data = scaler.fit_transform(data_array)

                scaled_row[col] = scaled_data.flatten().tolist()

            else:
                print(f"Skipping row {i} for column {col} as it is not a list.")
        except Exception as e:
            raise
    scaled_rows.append(scaled_row)
scaled_path_detail_df = pd.DataFrame(scaled_rows, columns=agg_path_detail_df.columns)

In [8]:
final = []
for i in range(len(scaled_path_detail_df)):
    li = []
    print("Player Name:", map_pids_to_player[scaled_path_detail_df.iloc[i]['pid']])
    for x in range(len(scaled_path_detail_df.iloc[i]['cy'])):
      temp = []
      temp.append(scaled_path_detail_df.iloc[i]['cy'][x])
      temp.append(scaled_path_detail_df.iloc[i]['cz'][x])
      li.append(temp)
    final.append(li)
    print(li)

Player Name: Kobe Bryant
[[0.23785305359024242, 0.0], [0.38854901663977726, 0.0126274268166926], [0.5224647948096511, 0.032443034428068285], [0.626938364805661, 0.06096876411236141], [0.7154127004995212, 0.10273780603026994], [0.8032357175663825, 0.15825796443815332], [0.883006686028053, 0.22339246010002922], [0.9473248759063408, 0.29400451377991466], [0.9887895572230538, 0.36595734624182596], [1.0, 0.4351141782497814], [0.97312465593642, 0.49814066840650795], [0.901209854327246, 0.5619891078256007], [0.7940306464868202, 0.6277867762707239], [0.6645972813863766, 0.693615546762282], [0.5259200079971496, 0.7575572923206797], [0.3910090752903741, 0.8176938859663212], [0.2728747322372833, 0.8721072007196118], [0.1770560961741712, 0.9195130138772316], [0.08597797147297687, 0.9616566889861415], [0.0, 1.0]]
Player Name: Dirk Nowitzki
[[0.6893034678160399, 0.0011319605421153955], [0.7678147717214767, 0.0], [0.8399500510585771, 0.009541427021021454], [0.9010039269477099, 0.03243722515362135], [

In [11]:
def calculate_metrics(data):
  metrics = []
  xmean = 0
  ymean = 0
  x = []
  y = []
  for i in range(len(data)):
    x.append(data[i][0])
    y.append(data[i][1])
    xmean += data[i][0]
    ymean += data[i][1]
  xmean /= len(data)
  ymean /= len(data)
  xstd = statistics.stdev(x)
  ystd = statistics.stdev(y)
  xvar = statistics.variance(x)
  yvar = statistics.variance(y)
  metrics.append(xmean)
  metrics.append(ymean)
  metrics.append(xstd)
  metrics.append(ystd)
  metrics.append(xvar)
  metrics.append(yvar)
  return metrics

In [10]:
columns = ['xy', 'xmean', 'ymean', 'xstd', 'ystd', 'xvar', 'yvar', 'Label']
df = pd.DataFrame(columns=columns)

In [12]:
for i in range(len(final)):
  df.loc[i] = [np.array(final[i]), calculate_metrics(final[i])[0], calculate_metrics(final[i])[1], calculate_metrics(final[i])[2], calculate_metrics(final[i])[3], calculate_metrics(final[i])[4], calculate_metrics(final[i])[5], map_pids_to_player[scaled_path_detail_df.iloc[i]['pid']]]

In [14]:
flattened_xy_features = np.array([arr.flatten() for arr in df['xy'].values])
xmean_features = df['xmean'].values.reshape(-1, 1)
ymean_features = df['ymean'].values.reshape(-1, 1)
xstd_features = df['xstd'].values.reshape(-1, 1)
ystd_features = df['ystd'].values.reshape(-1, 1)
xvar_features = df['xvar'].values.reshape(-1, 1)
yvar_features = df['yvar'].values.reshape(-1, 1)
X = np.hstack((flattened_xy_features, xmean_features, ymean_features, xstd_features, ystd_features, xvar_features, yvar_features))
y = df['Label']

In [15]:
knn_manhattan = KNeighborsClassifier(n_neighbors=1, metric='manhattan')
knn_manhattan.fit(X, y)

In [16]:
knn_euclidean = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
knn_euclidean.fit(X, y)

In [17]:
knn_chebyshev = KNeighborsClassifier(n_neighbors=1, metric='chebyshev')
knn_chebyshev.fit(X, y)

In [18]:
def give_prediction(test_array):
  options = []
  prediction1 = knn_chebyshev.predict(test_array)

  prediction2 = knn_euclidean.predict(test_array)

  prediction3 = knn_manhattan.predict(test_array)

  if(prediction1[0] == prediction2[0] == prediction3[0]):
    options.append(prediction1[0])
  else:
    options.append(prediction1[0])
    options.append(prediction2[0])
    options.append(prediction3[0])
  return options

In [19]:
chris = [[1.0, 0.0], [0.694989106753813, 0.08669454008853898], [0.44589687726942623, 0.17338908017707821], [0.2628903413217139, 0.249754058042302], [0.140885984023239, 0.3146827348745695], [0.05374001452432821, 0.38108706345302507], [0.007262164124909234, 0.44749139203148053], [0.0, 0.5126660108214461], [0.01960784313725487, 0.5742744712247909], [0.0653594771241829, 0.6309640924741761], [0.1960784313725489, 0.6844564682734874], [0.39288307915758897, 0.7283571077225773], [0.5933188090050835, 0.7741023118544024], [0.7785039941902687, 0.8202164289227742], [0.9317356572258533, 0.8622725036891293], [0.9426289034132173, 0.8991637973438269], [0.9448075526506899, 0.9331037875061485], [0.9389978213507626, 0.9613871126414166], [0.9135802469135803, 0.9833989178553861], [0.8482207697893972, 1.0]]

In [20]:
clo = np.array(chris).flatten()
test = np.hstack((clo, calculate_metrics(chris)[0], calculate_metrics(chris)[1], calculate_metrics(chris)[2], calculate_metrics(chris)[3], calculate_metrics(chris)[4], calculate_metrics(chris)[5]))

In [21]:
give_prediction([test])

['Manu Ginobili', 'Manu Ginobili', 'James Harden']

In [24]:
f = [1.0, 0.8284600389863547, 0.6695906432748535, 0.5477582846003898, 0.46003898635477586, 0.39766081871345027, 0.361598440545809, 0.3693957115009746, 0.39766081871345027, 0.466374269005848, 0.5599415204678363, 0.6442495126705653, 0.6842105263157895, 0.6764132553606238, 0.6028265107212476, 0.4956140350877193, 0.3698830409356725, 0.23732943469785572, 0.10916179337231968, 0.0]
s = [0.0, 0.07386888273314876, 0.14573714989227463, 0.21406586642043707, 0.2730070791012619, 0.3228685749461373, 0.3742690058479533, 0.42105263157894735, 0.4678362573099415, 0.5112342259156664, 0.559248999692213, 0.6180363188673438, 0.6869806094182827, 0.7559248999692213, 0.8248691905201601, 0.8845798707294552, 0.9315173899661434, 0.9653739612188365, 0.9876885195444752, 1.0]
test_aadhi = [[f[i], s[i]] for i in range(len(f))]

In [25]:
test_aadhi

[[1.0, 0.0],
 [0.8284600389863547, 0.07386888273314876],
 [0.6695906432748535, 0.14573714989227463],
 [0.5477582846003898, 0.21406586642043707],
 [0.46003898635477586, 0.2730070791012619],
 [0.39766081871345027, 0.3228685749461373],
 [0.361598440545809, 0.3742690058479533],
 [0.3693957115009746, 0.42105263157894735],
 [0.39766081871345027, 0.4678362573099415],
 [0.466374269005848, 0.5112342259156664],
 [0.5599415204678363, 0.559248999692213],
 [0.6442495126705653, 0.6180363188673438],
 [0.6842105263157895, 0.6869806094182827],
 [0.6764132553606238, 0.7559248999692213],
 [0.6028265107212476, 0.8248691905201601],
 [0.4956140350877193, 0.8845798707294552],
 [0.3698830409356725, 0.9315173899661434],
 [0.23732943469785572, 0.9653739612188365],
 [0.10916179337231968, 0.9876885195444752],
 [0.0, 1.0]]

In [30]:
aadhi = [[1.0, 0.0],
 [0.8284600389863547, 0.07386888273314876],
 [0.6695906432748535, 0.14573714989227463],
 [0.5477582846003898, 0.21406586642043707],
 [0.46003898635477586, 0.2730070791012619],
 [0.39766081871345027, 0.3228685749461373],
 [0.361598440545809, 0.3742690058479533],
 [0.3693957115009746, 0.42105263157894735],
 [0.39766081871345027, 0.4678362573099415],
 [0.466374269005848, 0.5112342259156664],
 [0.5599415204678363, 0.559248999692213],
 [0.6442495126705653, 0.6180363188673438],
 [0.6842105263157895, 0.6869806094182827],
 [0.6764132553606238, 0.7559248999692213],
 [0.6028265107212476, 0.8248691905201601],
 [0.4956140350877193, 0.8845798707294552],
 [0.3698830409356725, 0.9315173899661434],
 [0.23732943469785572, 0.9653739612188365],
 [0.10916179337231968, 0.9876885195444752],
 [0.0, 1.0]]

In [31]:
type(aadhi)

list

In [32]:
adi = np.array(aadhi).flatten()
test_2 = np.hstack((adi, calculate_metrics(aadhi)[0], calculate_metrics(aadhi)[1], calculate_metrics(aadhi)[2], calculate_metrics(aadhi)[3], calculate_metrics(aadhi)[4], calculate_metrics(aadhi)[5]))

In [34]:
give_prediction([test_2])

['Kyle Korver']

In [35]:
aaron = [[0.7453773113443277, 0.0], [0.42778610694652647, 0.14691778843376294], [0.2588705647176412, 0.25114102490034085], [0.20314842578710646, 0.3336992316136115], [0.24012993503248378, 0.41619966491420657], [0.33508245877061477, 0.5147033335259114], [0.47351324337831086, 0.49240279623317346], [0.6434282858570713, 0.5433011728002772], [0.8073463268365818, 0.5987636489687447], [0.9375312343828086, 0.6592524120399793], [1.0, 0.725114102490034], [0.9590204897551223, 0.7880871223063147], [0.8370814592703648, 0.8472470968860131], [0.6841579210394803, 0.897567739326362], [0.5032483758120938, 0.9345427234386735], [0.3620689655172413, 0.9637182968397943], [0.2491254372813593, 0.9757351666762955], [0.06696651674162919, 0.9877520365127969], [0.006996501749125433, 0.9963025015887689], [0.0, 1.0]]

In [36]:
aar = np.array(aaron).flatten()
test_3 = np.hstack((aar, calculate_metrics(aaron)[0], calculate_metrics(aaron)[1], calculate_metrics(aaron)[2], calculate_metrics(aaron)[3], calculate_metrics(aaron)[4], calculate_metrics(aaron)[5]))


In [37]:
give_prediction([test_3])

['LeBron James', 'Danny Green', 'Kevin Durant']