
This program uses data from the 2014-2018 NBA seasons to classify players by 
position.


Previous year's game statistics are used to generate a likelihood function
for each position. Then a certain player can be tested against each of these 
functions to guess their position.


In [0]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math
import pandas as pd
from enum import Enum





In [43]:

# Defines an enumeration for player positions

class Position(Enum):
  PG = 0
  SG = 1
  SF = 2
  PF = 3
  C = 4
  

# Defines an array of priors (each position's percentage of players)
# TODO: Update with actual priors
prior = {
    Position.PG : 0.2,
    Position.SG : 0.2,
    Position.SF : 0.2,
    Position.PF : 0.2,
    Position.C : 0.2 
}







0.2


In [0]:
# Load Data
pg_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20PG-clean.csv'
sg_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20SG-clean.csv'
sf_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20SF-clean.csv'
pf_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20PF-clean.csv'
c_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20C-Clean.csv'

data = {
    Position.PG : pd.read_csv(pg_url).dropna(),
    Position.SG : pd.read_csv(sg_url).dropna(),
    Position.SF : pd.read_csv(sf_url).dropna(),
    Position.PF : pd.read_csv(pf_url).dropna(),
    Position.C : pd.read_csv(c_url)
}



In [97]:
print(data[Position.PG].cov().to_numpy())

[[ 1.75073626e-03  4.81033599e-04  1.66886546e-03  2.87231200e-04
   1.73308002e-02  2.54672282e-02  4.59583535e-03  1.43943733e-03
   9.99439018e-02]
 [ 4.81033599e-04  4.04041308e-03  3.13009904e-04  1.42918250e-03
   1.10962412e-03  1.55239110e-02  3.16118644e-03 -1.70261617e-03
   1.05981856e-01]
 [ 1.66886546e-03  3.13009904e-04  2.04514074e-03  5.32769019e-04
   1.77840171e-02  2.45859875e-02  5.30402168e-03  1.27251124e-03
   1.08489417e-01]
 [ 2.87231200e-04  1.42918250e-03  5.32769019e-04  7.24596799e-03
  -1.25327441e-02 -1.89325032e-03 -1.34275222e-03 -3.00231984e-03
   1.32637654e-01]
 [ 1.73308002e-02  1.10962412e-03  1.77840171e-02 -1.25327441e-02
   2.08680549e+00  2.29368800e+00  4.64986049e-01  1.53791306e-01
   6.33007068e+00]
 [ 2.54672282e-02  1.55239110e-02  2.45859875e-02 -1.89325032e-03
   2.29368800e+00  4.55876675e+00  6.99442062e-01  1.52234290e-01
   9.59772651e+00]
 [ 4.59583535e-03  3.16118644e-03  5.30402168e-03 -1.34275222e-03
   4.64986049e-01  6.9944206

In [0]:
# TODO: Generate actual MEAN and COV for each position

avgs = {
    Position.PG : data[Position.PG].mean(0).to_numpy(),
    Position.SG : data[Position.SG].mean(0).to_numpy(),
    Position.SF : data[Position.SF].mean(0).to_numpy(),
    Position.PF : data[Position.PF].mean(0).to_numpy(),
    Position.C : data[Position.C].mean(0).to_numpy()
}

covs = {
    Position.PG : data[Position.PG].cov().to_numpy(),
    Position.SG : data[Position.SG].cov().to_numpy(),
    Position.SF : data[Position.SF].cov().to_numpy(),
    Position.PF : data[Position.PF].cov().to_numpy(),
    Position.C : data[Position.C].cov().to_numpy()
}




In [0]:
"""
  Evaluates a player against a position's likelihood.
  params: positon = Position Enum
          player = numpy array of stats

"""
def evaluate_likelihood(position, player):
  mu = avgs[position]
  sig = covs[position]
  
  p = (math.sqrt(2*math.pi)) ** 3
  c = 1 / (p * np.linalg.det(sig))
  
  t = np.matmul(np.matmul(sig, (player - mu)), np.transpose(player - mu))
  
  return c * math.exp(-0.5 * t)

In [0]:
def guess_position(player):
  positions = {}
  positions[Position.PG] = evaluate_likelihood(Position.PG, player)
  positions[Position.SG] = evaluate_likelihood(Position.SG, player)
  positions[Position.SF] = evaluate_likelihood(Position.SF, player)
  positions[Position.PF] = evaluate_likelihood(Position.PF, player)
  positions[Position.C] = evaluate_likelihood(Position.C, player)
  
  v = list(positions.values())
  k = list(positions.keys())
  
  return k[v.index(max(v))]

In [103]:

test_player = np.array([0.535,0.36,0.604,0.821,6.7,4.2,0.9,1.3,13.6])
print(guess_position(test_player))

Position.PG


In [100]:
print(data[Position.C].columns.values)

['FG%' '3P%' '2P%' 'FT%' 'TRB' 'AST' 'STL' 'BLK' 'PTS']
