In [10]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from numpy.random import randn

In [2]:
# Import the CSV file and create a DataFrame.
player_df_final = pd.read_csv('player_data_final.csv')

In [3]:
# Create a DataFrame of only Tune Squad players.
ts_df = player_df_final.iloc[26: , :]
ts_df

Unnamed: 0,ID,player,points,possessions,team_pace,GP,MPG,TS%,AST,TO,USG,ORR,DRR,REBR,PER
26,31,tune_squad1,2049.0,1434.0,110.0,64.0,38.8,0.619,31.5,14.9,35.5,8.3,17.6,12.8,28.44
27,32,tune_squad2,1795.0,1481.8,112.1,62.0,35.4,0.608,31.9,14.5,32.0,6.5,22.5,12.9,23.34
28,33,tune_squad3,1805.0,1509.9,108.6,64.0,35.4,0.622,27.9,13.9,36.0,5.9,27.7,12.2,22.41
29,34,tune_squad4,1743.0,1422.4,112.9,64.0,36.3,0.619,30.9,15.6,34.5,5.9,18.9,14.8,29.853138
30,35,tune_squad5,1963.0,1539.1,117.4,59.771429,35.208333,0.633,32.3,16.2,34.0,5.9,19.8,13.1,27.16
31,36,tune_squad6,2062.0,1505.7,111.5,59.771429,37.0,0.62,29.8,15.6,36.2,4.9,23.9,14.7,27.86
32,37,tune_squad7,1845.0,1435.7,113.1,69.0,36.9,0.634,33.2,14.0,36.5,4.1,21.5,16.4,34.26
33,38,tune_squad8,1778.0,1526.4,109.3,66.0,34.9,0.612,30.6,15.9,35.9,5.5,18.8,13.7,28.65
34,39,tune_squad9,1901.0,1444.1,109.7,67.0,36.5,0.609,27.2,14.8,35.5,5.0,21.8,8.9,20.12
35,41,tune_squad11,2030.0,1431.0,112.3,68.0,37.0,0.618,32.5,15.3,34.5,5.7,15.7,13.2,30.07


In [5]:
# Import Tune Squad player names.
ts_name_df = pd.read_csv('tune_squad.csv', sep='\t')
ts_name_df

Unnamed: 0,ID,player
0,31,Sylvester
1,32,Marvin the Martian
2,33,Road Runner
3,34,Foghorn Leghorn
4,35,Bugs Bunny
5,36,Elmer Fudd
6,37,Lola Bunny
7,38,Porky Pig
8,39,Tasmanian Devil
9,40,Yosemite Sam


In [6]:
# Merge the two DataFrames.
ts_df = pd.merge(ts_df, ts_name_df, on='ID', how='left', suffixes=('_type', '_name'))
ts_df.head()

Unnamed: 0,ID,player_type,points,possessions,team_pace,GP,MPG,TS%,AST,TO,USG,ORR,DRR,REBR,PER,player_name
0,31,tune_squad1,2049.0,1434.0,110.0,64.0,38.8,0.619,31.5,14.9,35.5,8.3,17.6,12.8,28.44,Sylvester
1,32,tune_squad2,1795.0,1481.8,112.1,62.0,35.4,0.608,31.9,14.5,32.0,6.5,22.5,12.9,23.34,Marvin the Martian
2,33,tune_squad3,1805.0,1509.9,108.6,64.0,35.4,0.622,27.9,13.9,36.0,5.9,27.7,12.2,22.41,Road Runner
3,34,tune_squad4,1743.0,1422.4,112.9,64.0,36.3,0.619,30.9,15.6,34.5,5.9,18.9,14.8,29.853138,Foghorn Leghorn
4,35,tune_squad5,1963.0,1539.1,117.4,59.771429,35.208333,0.633,32.3,16.2,34.0,5.9,19.8,13.1,27.16,Bugs Bunny


In [7]:
# Rearrange the columns to put the ID and player_name columns next to each other.
column_list = list(ts_df)

player_name = column_list.pop()
column_list[1] = player_name

ts_df = ts_df[column_list]
ts_df.head()

Unnamed: 0,ID,player_name,points,possessions,team_pace,GP,MPG,TS%,AST,TO,USG,ORR,DRR,REBR,PER
0,31,Sylvester,2049.0,1434.0,110.0,64.0,38.8,0.619,31.5,14.9,35.5,8.3,17.6,12.8,28.44
1,32,Marvin the Martian,1795.0,1481.8,112.1,62.0,35.4,0.608,31.9,14.5,32.0,6.5,22.5,12.9,23.34
2,33,Road Runner,1805.0,1509.9,108.6,64.0,35.4,0.622,27.9,13.9,36.0,5.9,27.7,12.2,22.41
3,34,Foghorn Leghorn,1743.0,1422.4,112.9,64.0,36.3,0.619,30.9,15.6,34.5,5.9,18.9,14.8,29.853138
4,35,Bugs Bunny,1963.0,1539.1,117.4,59.771429,35.208333,0.633,32.3,16.2,34.0,5.9,19.8,13.1,27.16


In [8]:
# Create a list of only the column names we're interested in.
game_stat_cols = list(ts_df.iloc[:, 7:-1])
game_stat_stdevs = []

# Create a list of standard deviations for each stat.
for stat in game_stat_cols:
    game_stat_stdevs.append(ts_df[stat].std())

# Create a Series of the standard deviations, with the stat names as the index.
stdev_s = pd.Series(game_stat_stdevs, index=game_stat_cols)
stdev_s

TS%     0.008262
AST     2.140494
TO      0.797197
USG     1.892718
ORR     1.139465
DRR     3.017962
REBR    1.802564
dtype: float64

In [9]:
# Get the dependent and independent variables for modeling the PER.
X = player_df_final.iloc[:, 7:-1].to_numpy()
y = player_df_final.iloc[:, -1]

In [11]:
# Define and fit the model.
lin_reg = LinearRegression()
lin_reg.fit(X, y)

In [12]:
# Print the player with the highest and lower PER for each iteration.
print('Iteration # \thigh PER \tlow PER')

# Run the simulation 10 times.
for i in range(10):

    # Define an empty temporary DataFrame for each iteration.
    # The columns of this DataFrame are the player stats and the index is the players' names.
    game_df = pd.DataFrame(columns=game_stat_cols, index=list(ts_df['player_name']))
    
    # Loop through each stat.
    for stat in game_stat_cols:
        
        # Each player's stats are used to generate a random value for each iteration.
        game_df[stat] = list(ts_df[stat] + randn(len(ts_df)) * stdev_s[stat])
    
    # Use the fitted model to predict players' PERs based on the randomized data.
    game_df['PER'] = lin_reg.predict(game_df)

    # Print the player with the highest and lower PER for each iteration.
    print('Iteration {}'.format(i+1) + ' \t' + game_df['PER'].idxmax() + ' \t' + game_df['PER'].idxmin())

Iteration # 	high PER 	low PER
Iteration 1 	Lola Bunny 	Road Runner
Iteration 2 	Foghorn Leghorn 	Penelope
Iteration 3 	Gossamer 	Tasmanian Devil
Iteration 4 	Lola Bunny 	Penelope
Iteration 5 	Lola Bunny 	Tasmanian Devil
Iteration 6 	Bugs Bunny 	Tasmanian Devil
Iteration 7 	Gossamer 	Penelope
Iteration 8 	Porky Pig 	Tasmanian Devil
Iteration 9 	Gossamer 	Tweety
Iteration 10 	Porky Pig 	Tasmanian Devil




In [13]:
# Initialize four empty DataFrames, one for each 12-minute period.
number_of_iterations = 4
df_list = [pd.DataFrame(columns=game_stat_cols, index=list(ts_df['player_name'])) for i in range(number_of_iterations)]

# For each period, generate randomized player data and predict the PER.
# Use the model fitted earlier.
for df in df_list:
    for stat in game_stat_cols:
        df[stat] = list(ts_df[stat] + randn(len(ts_df)) * stdev_s[stat])
    df['PER'] = lin_reg.predict(df)

# Concatenate the DataFrames and make the players' names the index.
game_df = pd.concat(df_list)
game_df.rename_axis('player_name', inplace=True)

# Create another index for the period in question.
minutes = [(x // len(ts_df)) * 12 for x in range(len(game_df))]
game_df['minutes'] = minutes
game_df.set_index('minutes', append=True, inplace=True)
game_df = game_df.swaplevel()

game_df



Unnamed: 0_level_0,Unnamed: 1_level_0,TS%,AST,TO,USG,ORR,DRR,REBR,PER
minutes,player_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Sylvester,0.599094,29.694666,14.122536,35.677452,9.602194,17.77966,12.434687,26.613168
0,Marvin the Martian,0.613249,31.587346,15.380515,32.911035,5.96559,22.242832,13.688149,25.136978
0,Road Runner,0.634867,24.051221,12.889259,35.109305,5.458751,23.576376,12.024377,25.320952
0,Foghorn Leghorn,0.622704,30.199921,15.677825,31.724547,3.656702,17.456592,13.848966,28.430687
0,Bugs Bunny,0.630576,32.385243,15.985186,36.794694,7.016986,26.708253,11.895875,22.674406
0,Elmer Fudd,0.622691,27.561834,17.438593,33.725853,5.388716,25.930372,17.270003,27.454844
0,Lola Bunny,0.646987,32.652201,15.00834,37.300706,5.35704,21.77559,13.797096,30.593059
0,Porky Pig,0.60623,32.227383,15.758277,36.508568,5.669478,20.961291,14.379246,28.572473
0,Tasmanian Devil,0.611258,28.090243,14.394112,33.638119,6.792698,20.039455,9.098895,20.450679
0,Gossamer,0.616566,30.371703,15.597773,32.826015,6.379478,16.854152,11.262217,24.901267


In [14]:
# Export the finished DataFrame to CSV.
game_df.to_csv('game_stats.csv')