<a href="https://colab.research.google.com/github/ssaltwick/ENEE324-Project/blob/master/ENEE324_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


This program uses data from the 2014-2018 NBA seasons to classify players by 
position.


Previous year's game statistics are used to generate a likelihood function
for each position. Then a certain player can be tested against each of these 
functions to guess their position.


In [0]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stat
import math
import pandas as pd
from enum import Enum
from mpl_toolkits import mplot3d
from itertools import combinations

from sklearn.mixture import GaussianMixture



In [0]:

# Defines an enumeration for player positions

class Position(Enum):
  PG = 0
  SG = 1
  SF = 2
  PF = 3
  C = 4
  

In [0]:
# fig = plt.figure()
# ax = plt.axes(projection='3d')

# ax.scatter3D(data[Position.PG]['3PA'],data[Position.PG]['2P%'],data[Position.PG]['BLK'])
# # your ellispsoid and center in matrix form
# A = covs[Position.PG]
# center = avgs[Position.PG]

# # find the rotation matrix and radii of the axes
# U, s, rotation = np.linalg.svd(A)
# radii = 1.0/np.sqrt(s)

# # now carry on with EOL's answer
# u = np.linspace(0.0, 2.0 * np.pi, 100)
# v = np.linspace(0.0, np.pi, 100)
# x = radii[0] * np.outer(np.cos(u), np.sin(v))
# y = radii[1] * np.outer(np.sin(u), np.sin(v))
# z = radii[2] * np.outer(np.ones_like(u), np.cos(v))
# for i in range(len(x)):
#     for j in range(len(x)):
#         [x[i,j],y[i,j],z[i,j]] = np.dot([x[i,j],y[i,j],z[i,j]], rotation) + center



# ax.plot_wireframe(x, y, z,  rstride=4, cstride=4, color='b', alpha=0.2)
# plt.show()
# plt.close(fig)
# del fig



In [0]:
"""
  Evaluates a player against a position's likelihood.
  params: positon = Position Enum
          player = numpy array of stats

"""
def evaluate_likelihood(position, player, avgs, covs, prior):
  mu = avgs[position]
  sig = covs[position]
  
  p = (math.sqrt(2*math.pi)) ** 3
  c = 1 / (p * math.sqrt(np.linalg.det(sig)))
  
  # print(sig.shape, mu.shape, player.shape)
   
  t = np.dot(np.transpose(player-mu), np.dot(sig, (player-mu)))
  
  
  return c * math.exp(-0.5 * t)

In [0]:
def guess_position(player, avgs, covs, prior, pos):
  positions = {}
  
  for p in pos:
    positions[p] = evaluate_likelihood(p, player, avgs, covs, prior) * prior[p]
  # print(positions)
  v = list(positions.values())
  k = list(positions.keys())
  
  return k[v.index(max(v))]

In [0]:
def compare_position(actual, guessed):

  pos_names = {
      Position.PG : "PG",
      Position.SG : "SG",
      Position.SF : "SF",
      Position.PF : "PF",
      Position.C : "C",
  }

# Guard, Forward, Center

#   if guessed == Position.PG or guessed == Position.SG:
#     if actual == "PG" or actual == "SG":
#       return 1
#   if guessed == Position.SF or guessed == Position.PF:
#     if actual == "SF" or actual == "PF":
#       return 1
#   if guessed == Position.C:
#     if actual == "C":
#       return 1
#   return 0

# PG, SG, SF, PF, C
  if pos_names[guessed] == actual:
    # print("Position %s Guessed Correctly" % actual)
    return 1
  else:
    # print("Position Guessed Incorrectly- Actual: %s     Guessed: %s" %(actual, pos_names[guessed]))
    return 0
      


In [0]:
# Load Data
pg_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20PG-clean.csv'
sg_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20SG-clean.csv'
sf_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20SF-clean.csv'
pf_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20PF-clean.csv'
c_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20C-Clean.csv'
test_url = 'https://raw.githubusercontent.com/ssaltwick/ENEE324-Project/master/data/Data%20-%20Test-Clean.csv'


urls = {
    Position.PG : pg_url,
    Position.SG : sg_url,
    Position.SF : sf_url,
    Position.PF : pf_url,
    Position.C : c_url
}
test_data = pd.read_csv(test_url).dropna()
#2p%, 3pa, ft%


# Selected Stats (Best so far)
#stats = ['3PA', '3P%', '2P%',  'BLK']

In [0]:
def find_best_stats(num_stats):
  # All stats
  all_stats = ['3PA', '3P%','2P%', 'PTS', 'AST', 'FT%', 'STL', 'BLK', 'ORB','TRB']
  combos = list(combinations(all_stats,num_stats))

  results = {}
  for stats in combos:
    stats = list(stats)
    data = {
        Position.PG : pd.read_csv(pg_url).dropna()[stats],
        Position.SG : pd.read_csv(sg_url).dropna()[stats],
        Position.SF : pd.read_csv(sf_url).dropna()[stats],
        Position.PF : pd.read_csv(pf_url).dropna()[stats],
        Position.C : pd.read_csv(c_url).dropna()[stats]
    }

    total_players = data[Position.PG].shape[0] + data[Position.SG].shape[0] + \
      data[Position.SF].shape[0] + data[Position.PF].shape[0] + data[Position.C].shape[0]


    # Defines an array of priors (each position's percentage of players)
    # TODO: Update with actual priors),
    prior = {
        Position.PG : data[Position.PG].shape[0] / total_players,
        Position.SG : data[Position.SG].shape[0] / total_players,
        Position.SF : data[Position.SF].shape[0] / total_players,
        Position.PF : data[Position.PF].shape[0] / total_players,
        Position.C : data[Position.C].shape[0] / total_players
    }




    # TODO: Generate actual MEAN and COV for each position

    avgs = {
        Position.PG : data[Position.PG].mean(0).to_numpy(),
        Position.SG : data[Position.SG].mean(0).to_numpy(),
        Position.SF : data[Position.SF].mean(0).to_numpy(),
        Position.PF : data[Position.PF].mean(0).to_numpy(),
        Position.C : data[Position.C].mean(0).to_numpy()
    }

    covs = {
        Position.PG : data[Position.PG].cov().to_numpy(),
        Position.SG : data[Position.SG].cov().to_numpy(),
        Position.SF : data[Position.SF].cov().to_numpy(),
        Position.PF : data[Position.PF].cov().to_numpy(),
        Position.C : data[Position.C].cov().to_numpy()
    }

    # Display stat with minimum variance 
    # print(np.argmin(np.diag(covs[Position.PG])))
    # print(np.argmin(np.diag(covs[Position.SG])))
    # print(np.argmin(np.diag(covs[Position.SF])))
    # print(np.argmin(np.diag(covs[Position.PF])))
    # print(np.argmin(np.diag(covs[Position.C])))






    test_frame = test_data[['Pos'] + stats]

    sample_size = test_frame.shape[0]
    num_correct = 0.0


    for i in range(0,sample_size):

      test_player = test_frame.to_numpy()[i,0:]

      guess = guess_position(test_player[1:], avgs, covs, prior)

      num_correct += compare_position(test_player[0], guess)

    percent_correct = (num_correct / sample_size)


    results[''.join(stats)] = percent_correct

    #print(''.join(stats))
    #print('Guessed {:.2%}  of players correct'.format(percent_correct))



  v = list(results.values())
  k = list(results.keys())

  best = k[v.index(max(v))]  
  print("Best result using %s with %s accuracy" % (best, max(v)))

In [0]:
def compare_PG_SG(num_stats):
  all_stats = ['3PA', '3P%','2P%', 'PTS', 'AST', 'FT%', 'STL', 'BLK', 'ORB','TRB']
  combos = list(combinations(all_stats,num_stats))
  compare = [Position.PG, Position.SG]
  
  results = {}
  for stats in combos:
    stats = list(stats)
    data = {}
    for p in compare:
      
      
      

    total_players = 0
    for p in compare:
      total_players += data[p].shape[0]
    


    # Defines an array of priors (each position's percentage of players)
    # TODO: Update with actual priors),
    prior = {
        Position.PG : data[Position.PG].shape[0] / total_players,
        Position.SG : data[Position.SG].shape[0] / total_players,
    }
    prior = {}
    
    




    # TODO: Generate actual MEAN and COV for each position

    avgs = {
        Position.PG : data[Position.PG].mean(0).to_numpy(),
        Position.SG : data[Position.SG].mean(0).to_numpy(),
    }

    covs = {
        Position.PG : data[Position.PG].cov().to_numpy(),
        Position.SG : data[Position.SG].cov().to_numpy(),
    }

    for p in compare:
      data[p] = pd.read_csv(urls[p]).dropna()[stats]
      total_players += data[p].shape[0]
    
    for p in compare:
      prior[p] = data[p].shape[0] / total_players
      avgs[p]
    # Display stat with minimum variance 
    # print(np.argmin(np.diag(covs[Position.PG])))
    # print(np.argmin(np.diag(covs[Position.SG])))
    # print(np.argmin(np.diag(covs[Position.SF])))
    # print(np.argmin(np.diag(covs[Position.PF])))
    # print(np.argmin(np.diag(covs[Position.C])))






    test_frame = test_data[['Pos'] + stats]

    test_frame = test_frame.loc[(test_frame['Pos'] == 'PG') | (test_frame['Pos'] == 'SG')]
    sample_size = test_frame.shape[0]
    num_correct = 0.0


    for i in range(0,sample_size):

      test_player = test_frame.to_numpy()[i,0:]

      guess = guess_position(test_player[1:], avgs, covs, prior, compare)

      num_correct += compare_position(test_player[0], guess)

    percent_correct = (num_correct / sample_size)


    results[''.join(stats)] = percent_correct

    #print(''.join(stats))
    #print('Guessed {:.2%}  of players correct'.format(percent_correct))



  v = list(results.values())
  k = list(results.keys())

  best = k[v.index(max(v))]  
  print("Best result using %s with %s accuracy" % (best, max(v)))

In [146]:
compare_PG_SG(10)

Best result using 3PA3P%2P%PTSASTFT%STLBLKORBTRB with 0.5176470588235295 accuracy


In [147]:
def compare_SG_SF(num_stats):
  all_stats = ['3PA', '3P%','2P%', 'PTS', 'AST', 'FT%', 'STL', 'BLK', 'ORB','TRB']
  combos = list(combinations(all_stats,num_stats))

  results = {}
    for stats in combos:
    stats = list(stats)
    data = {
        Position.SG : pd.read_csv(sg_url).dropna()[stats],
        Position.SF : pd.read_csv(sf_url).dropna()[stats]
    }

    total_players = data[Position.SG].shape[0] + data[Position.SF].shape[0]


    # Defines an array of priors (each position's percentage of players)
    # TODO: Update with actual priors),
    prior = {
        Position.SG : data[Position.SG].shape[0] / total_players,
        Position.SF : data[Position.SF].shape[0] / total_players,
    }




    # TODO: Generate actual MEAN and COV for each position

    avgs = {
        Position.SG : data[Position.SG].mean(0).to_numpy(),
        Position.SF : data[Position.SF].mean(0).to_numpy(),
    }

    covs = {
        Position.SG : data[Position.SG].cov().to_numpy(),
        Position.SF : data[Position.Sf].cov().to_numpy(),
    }

    # Display stat with minimum variance 
    # print(np.argmin(np.diag(covs[Position.PG])))
    # print(np.argmin(np.diag(covs[Position.SG])))
    # print(np.argmin(np.diag(covs[Position.SF])))
    # print(np.argmin(np.diag(covs[Position.PF])))
    # print(np.argmin(np.diag(covs[Position.C])))






    test_frame = test_data[['Pos'] + stats]

    test_frame
    sample_size = test_frame.shape[0]
    num_correct = 0.0


    for i in range(0,sample_size):

      test_player = test_frame.to_numpy()[i,0:]

      guess = guess_position(test_player[1:], avgs, covs, prior)

      num_correct += compare_position(test_player[0], guess)

    percent_correct = (num_correct / sample_size)


    results[''.join(stats)] = percent_correct

    #print(''.join(stats))
    #print('Guessed {:.2%}  of players correct'.format(percent_correct))



  v = list(results.values())
  k = list(results.keys())

  best = k[v.index(max(v))]  
  print("Best result using %s with %s accuracy" % (best, max(v)))

IndentationError: ignored

In [0]:
def compare_SF_PF(num_stats):
  all_stats = ['3PA', '3P%','2P%', 'PTS', 'AST', 'FT%', 'STL', 'BLK', 'ORB','TRB']
  combos = list(combinations(all_stats,num_stats))

  results = {}
    for stats in combos:
    stats = list(stats)
    data = {
        Position.SF : pd.read_csv(sf_url).dropna()[stats],
        Position.PF : pd.read_csv(pf_url).dropna()[stats]
    }

    total_players = data[Position.SF].shape[0] + data[Position.PF].shape[0]


    # Defines an array of priors (each position's percentage of players)
    # TODO: Update with actual priors),
    prior = {
        Position.SF : data[Position.SF].shape[0] / total_players,
        Position.PF : data[Position.PF].shape[0] / total_players,
    }




    # TODO: Generate actual MEAN and COV for each position

    avgs = {
        Position.SF : data[Position.SF].mean(0).to_numpy(),
        Position.PF : data[Position.PF].mean(0).to_numpy(),
    }

    covs = {
        Position.SF : data[Position.SF].cov().to_numpy(),
        Position.PF : data[Position.PF].cov().to_numpy(),
    }

    # Display stat with minimum variance 
    # print(np.argmin(np.diag(covs[Position.PG])))
    # print(np.argmin(np.diag(covs[Position.SG])))
    # print(np.argmin(np.diag(covs[Position.SF])))
    # print(np.argmin(np.diag(covs[Position.PF])))
    # print(np.argmin(np.diag(covs[Position.C])))






    test_frame = test_data[['Pos'] + stats]

    test_frame
    sample_size = test_frame.shape[0]
    num_correct = 0.0


    for i in range(0,sample_size):

      test_player = test_frame.to_numpy()[i,0:]

      guess = guess_position(test_player[1:], avgs, covs, prior)

      num_correct += compare_position(test_player[0], guess)

    percent_correct = (num_correct / sample_size)


    results[''.join(stats)] = percent_correct

    #print(''.join(stats))
    #print('Guessed {:.2%}  of players correct'.format(percent_correct))



  v = list(results.values())
  k = list(results.keys())

  best = k[v.index(max(v))]  
  print("Best result using %s with %s accuracy" % (best, max(v)))

In [0]:
def compare_PF_C(num_stats):
  all_stats = ['3PA', '3P%','2P%', 'PTS', 'AST', 'FT%', 'STL', 'BLK', 'ORB','TRB']
  combos = list(combinations(all_stats,num_stats))

  results = {}
    for stats in combos:
    stats = list(stats)
    data = {
        Position.PF : pd.read_csv(pf_url).dropna()[stats],
        Position.C : pd.read_csv(c_url).dropna()[stats]
    }

    total_players = data[Position.PF].shape[0] + data[Position.C].shape[0]


    # Defines an array of priors (each position's percentage of players)
    # TODO: Update with actual priors),
    prior = {
        Position.PF : data[Position.PF].shape[0] / total_players,
        Position.C : data[Position.C].shape[0] / total_players,
    }




    # TODO: Generate actual MEAN and COV for each position

    avgs = {
        Position.PF : data[Position.PF].mean(0).to_numpy(),
        Position.C : data[Position.C].mean(0).to_numpy(),
    }

    covs = {
        Position.PF : data[Position.PF].cov().to_numpy(),
        Position.C : data[Position.C].cov().to_numpy(),
    }

    # Display stat with minimum variance 
    # print(np.argmin(np.diag(covs[Position.PG])))
    # print(np.argmin(np.diag(covs[Position.SG])))
    # print(np.argmin(np.diag(covs[Position.SF])))
    # print(np.argmin(np.diag(covs[Position.PF])))
    # print(np.argmin(np.diag(covs[Position.C])))






    test_frame = test_data[['Pos'] + stats]

    test_frame
    sample_size = test_frame.shape[0]
    num_correct = 0.0


    for i in range(0,sample_size):

      test_player = test_frame.to_numpy()[i,0:]

      guess = guess_position(test_player[1:], avgs, covs, prior)

      num_correct += compare_position(test_player[0], guess)

    percent_correct = (num_correct / sample_size)


    results[''.join(stats)] = percent_correct

    #print(''.join(stats))
    #print('Guessed {:.2%}  of players correct'.format(percent_correct))



  v = list(results.values())
  k = list(results.keys())

  best = k[v.index(max(v))]  
  print("Best result using %s with %s accuracy" % (best, max(v)))