# Statistical Analysis

In [2]:
# import tools to analyze dataset with the datascience module
from datascience import *
import numpy as np
import statistics

In [3]:
# access the csv file
picks_data = Table.read_table("second_rounders_2005-14.csv")

In [5]:
# General analysis
print("There are " + str(picks_data.num_rows) + " players drafted in the second round from 2005 to 2014")

avg_yrs = round(np.mean(picks_data.column('Yrs')))
avg_pts = round(np.mean(picks_data.column('PTS_per_G')), 1)
print("The average second rounder will have a " + str(avg_yrs) + " year career.")
print("The average second rounder score " + str(avg_pts) + " points per game in their career.")

# players who played 2 years or less (initial contract is for two years)
two_yrs_or_less = picks_data.where('Yrs', are.below_or_equal_to(2)).column('Player')
print(str(len(two_yrs_or_less)) + " second round picks from 2005-2014 played two years or less in their careers.")

There are 300 players drafted in the second round from 2005 to 2014
The average second rounder will have a 3.0 year career.
The average second rounder score 3.6 points per game in their career.
176 second round picks from 2005-2014 played two years or less in their careers.


In [6]:
# Statistics for Games Played

# General Stats/Fun Facts
print("General Statistics/Fun Facts:")

zero_games = picks_data.where('G', are.equal_to(0)).column('Player')
print(str(len(zero_games)) + " second round picks from 2005-2014 played zero NBA games in their careers.")

avg_games = np.mean(picks_data.column('G'))
std_games = np.std(picks_data.column('G'))
avg_games_per_yr = np.mean(picks_data.column('G')) / np.mean(picks_data.column('Yrs'))
print("Second round picks from 2015-2014 played an average of " + 
      str(round(avg_games)) + " games in their overall career, with standard deviation of " + str(round(std_games)) + ".")
print("The second rounders played an average of " + str(round(avg_games_per_yr)) + " games per year played.")
print("")

# statistics per draft year
print("Statistics per draft year:")
for i in range(2005, 2015):
    year_data = picks_data.where("Draft Year", are.equal_to(i))
    games = year_data.column('G')
    avg_g = np.mean(games)
    per_yr_g = avg_g / np.mean(year_data.column('Yrs'))
    variance = statistics.pvariance(games)
    st_dev = np.std(games)
    print(str(i) + " second rounders played an average of " + 
          str(round(avg_g)) + " games in their career, with " + str(round(per_yr_g)) + " games per year.")
    print("Variance: " + str(variance) + ", Standard Deviation: " + str(round(st_dev,2)))
    print("")

General Statistics/Fun Facts:
75 second round picks from 2005-2014 played zero NBA games in their careers.
Second round picks from 2015-2014 played an average of 152.0 games in their overall career, with standard deviation of 216.0.
The second rounders played an average of 49.0 games per year played.

Statistics per draft year:
2005 second rounders played an average of 285.0 games in their career, with 56.0 games per year.
Variance: 110384, Standard Deviation: 332.24

2006 second rounders played an average of 150.0 games in their career, with 48.0 games per year.
Variance: 51409, Standard Deviation: 226.74

2007 second rounders played an average of 135.0 games in their career, with 48.0 games per year.
Variance: 50843, Standard Deviation: 225.49

2008 second rounders played an average of 163.0 games in their career, with 53.0 games per year.
Variance: 60185, Standard Deviation: 245.33

2009 second rounders played an average of 211.0 games in their career, with 55.0 games per year.
Vari

In [7]:
# Statistics for VORP

vorp_clm = picks_data.column('VORP')
avg_vorp = np.mean(vorp_clm)
std_vorp = np.std(vorp_clm)
print("Second round picks from 2015-2014 have an average VORP of " + 
      str(round((avg_vorp), 1)) + ", with standard deviation of " + str(round(std_vorp, 2)) + ".")
print("")

# statistics per draft year
print("Statistics per draft year:")
for i in range(2005, 2015):
    year_data = picks_data.where("Draft Year", are.equal_to(i))
    vorp_peryr = year_data.column('VORP')
    avg = np.mean(vorp_peryr)
    variance = statistics.pvariance(vorp_peryr)
    st_dev = np.std(vorp_peryr)
    print(str(i) + " second rounders have an average VORP of " + str(round(avg, 1)) + ".")
    print("Variance: " + str(round(variance, 2)) + ", Standard Deviation: " + str(round(st_dev,2)))
    print("")

Second round picks from 2015-2014 have an average VORP of 1.3, with standard deviation of 4.73.

Statistics per draft year:
2005 second rounders have an average VORP of 2.8.
Variance: 29.7, Standard Deviation: 5.45

2006 second rounders have an average VORP of 1.6.
Variance: 46.95, Standard Deviation: 6.85

2007 second rounders have an average VORP of 1.4.
Variance: 43.42, Standard Deviation: 6.59

2008 second rounders have an average VORP of 1.8.
Variance: 30.14, Standard Deviation: 5.49

2009 second rounders have an average VORP of 1.9.
Variance: 17.67, Standard Deviation: 4.2

2010 second rounders have an average VORP of 0.5.
Variance: 2.04, Standard Deviation: 1.43

2011 second rounders have an average VORP of 0.9.
Variance: 11.15, Standard Deviation: 3.34

2012 second rounders have an average VORP of 1.5.
Variance: 19.94, Standard Deviation: 4.47

2013 second rounders have an average VORP of 0.1.
Variance: 0.66, Standard Deviation: 0.81

2014 second rounders have an average VORP o

In [8]:
# Statistics per pick position

for i in range(31,61):
    # filter data by pick position
    data = picks_data.where('Pk', are.equal_to(i))
    
    # Calculate VORP statistics
    vorp_clm = data.column('VORP')
    avg_vorp = np.mean(vorp_clm)
    var_vorp = statistics.pvariance(vorp_clm)
    std_vorp = np.std(vorp_clm)
    
    # Calculate BPM statistics
    bpm_clm = data.column('BPM')
    avg_bpm = np.mean(bpm_clm)
    var_bpm = statistics.pvariance(bpm_clm)
    std_bpm = np.std(bpm_clm)
    
    print("Statistics for 2005-14 draftees picked at slot " + str(i) + ":")
    print("Average VORP: " + str(round(avg_vorp, 2)) + ", Variance: " + str(round(var_vorp, 2)) 
          + ", Standard Deviation: " + str(round(std_vorp, 2)))
    print("Average BPM: " + str(round(avg_bpm, 2)) + ", Variance: " + str(round(var_bpm, 2)) 
          + ", Standard Deviation: " + str(round(std_bpm, 2)))
    print("")

Statistics for 2005-14 draftees picked at slot 31:
Average VORP: 0.42, Variance: 2.01, Standard Deviation: 1.42
Average BPM: -3.29, Variance: 5.88, Standard Deviation: 2.43

Statistics for 2005-14 draftees picked at slot 32:
Average VORP: 0.04, Variance: 0.78, Standard Deviation: 0.88
Average BPM: -4.94, Variance: 19.78, Standard Deviation: 4.45

Statistics for 2005-14 draftees picked at slot 33:
Average VORP: 1.55, Variance: 3.96, Standard Deviation: 1.99
Average BPM: -2.13, Variance: 4.95, Standard Deviation: 2.22

Statistics for 2005-14 draftees picked at slot 34:
Average VORP: 1.85, Variance: 16.27, Standard Deviation: 4.03
Average BPM: -2.25, Variance: 6.01, Standard Deviation: 2.45

Statistics for 2005-14 draftees picked at slot 35:
Average VORP: 5.98, Variance: 86.33, Standard Deviation: 9.29
Average BPM: -1.4, Variance: 14.16, Standard Deviation: 3.76

Statistics for 2005-14 draftees picked at slot 36:
Average VORP: 0.56, Variance: 7.32, Standard Deviation: 2.71
Average BPM: -2