In [1]:
import pandas as pd
import numpy as np
import itertools

In [2]:
#statsDF - initial stats dataframe
#gamesDF - initial games dataframe
#newGamesDF - formatted games dataframe with game date as index and rest of record in one row
#finalStats - formatted stats dataframe with player's name as the index
#mainDF: The dataframe where the main analysis will take place.

In [3]:
#STATS CELL: This prepares the stats section into a dataframe
#Passing in csv file and getting rid of the unused rows
statsDF = pd.read_csv('stats.csv', delim_whitespace=True)
statsDF.columns = [col.replace(',', '') for col in statsDF.columns]
# if you want to operate on multiple columns, put them in a list like so:
cols = ['PLAYER', 'MIN', 'FGM','FGA', 'FG%', '3PM', '3PA','3P%','FTM','FTA','FT%','OREB','DREB','REB','AST','TOV','STL','BLK','PF','PTS','+/-']
# pass them to df.replace(), specifying each char and it's replacement:
statsDF[cols] = statsDF[cols].replace({'\$': '', ',': ''}, regex=True)
#Remove columns for team name, totals, and the word 'player'
statsDF = statsDF[statsDF.PLAYER != 'PLAYER']
statsDF = statsDF[statsDF.PLAYER != 'Totals:']
statsDF = statsDF[statsDF.PLAYER != 'Cleveland']
statsDF = statsDF[statsDF.PLAYER != 'BOS:']
statsDF = statsDF[statsDF.PLAYER != 'CLE:']
#Reset Index
statsDF = statsDF.reset_index(drop=True)

In [4]:
#GAMES CELL: This prepares the games section into a dataframe
#Passing in csv file and getting rid of the unused rows
gamesDF = pd.read_csv('games.csv', header= None, names=['a','b','c','d','e','f','g','h','i'], index_col=False, error_bad_lines=False, delim_whitespace=True)
gamesDF.columns = [col.replace(',', '') for col in gamesDF.columns]
#if you want to operate on multiple columns, put them in a list like so:
games_cols = ['a','b','c','d','e','f','g','h','i']
# pass them to df.replace(), specifying each char and it's replacement:
gamesDF[games_cols] = gamesDF[games_cols].replace({'\$': '', ',': ''}, regex=True)
# This will add the winner to row 1, colmumn b
if int(gamesDF['a'].iloc[4]) > int(gamesDF['a'].iloc[8]):
    gamesDF['b'].iloc[0] = gamesDF['a'].iloc[1]
else:
    gamesDF['b'].iloc[0] = gamesDF['a'].iloc[5]

In [5]:
#This cell will arrange the gamesDF into the actual formatted games dataframe, newGamesDF
#Setting up the index on the main stats dataframe
dateIndex = [gamesDF.iloc[9,0] + " " + gamesDF.iloc[9,1][1] + ", " + gamesDF.iloc[9,2]]
#Create an empty dataframe, based on the games date as the index 
newGamesDF = pd.DataFrame(index = dateIndex, columns=['Home','Away','W/L','1st Qtr H','2nd Qtr H','3rd Qtr H','4th Qtr H','1st Qtr A','2nd Qtr A','3rd Qtr A','4th Qtr A','Total H','Total A', 'Ref1','Ref2','Ref3'])
#The next lines will pull the data from the cells in the gamesDF and into the newGamesDF dataframe
newGamesDF['Away'] = gamesDF['a'].iloc[1]
newGamesDF['Home'] = gamesDF['a'].iloc[5]
newGamesDF['W/L'] = gamesDF['b'].iloc[0]
newGamesDF['1st Qtr H'] = gamesDF['b'].iloc[12]
newGamesDF['2nd Qtr H'] = gamesDF['c'].iloc[12]
newGamesDF['3rd Qtr H'] = gamesDF['d'].iloc[12]
newGamesDF['4th Qtr H'] = gamesDF['e'].iloc[12]
newGamesDF['1st Qtr A'] = gamesDF['b'].iloc[11]
newGamesDF['2nd Qtr A'] = gamesDF['c'].iloc[11]
newGamesDF['3rd Qtr A'] = gamesDF['d'].iloc[11]
newGamesDF['4th Qtr A'] = gamesDF['e'].iloc[11]
newGamesDF['Total H'] = gamesDF['f'].iloc[12]
newGamesDF['Total A'] = gamesDF['f'].iloc[11]
newGamesDF['Ref1'] = gamesDF['c'].iloc[22][:-2]
newGamesDF['Ref2'] = gamesDF['e'].iloc[22][:-2]
newGamesDF['Ref3'] = gamesDF['g'].iloc[22]
newGamesDF

Unnamed: 0,Home,Away,W/L,1st Qtr H,2nd Qtr H,3rd Qtr H,4th Qtr H,1st Qtr A,2nd Qtr A,3rd Qtr A,4th Qtr A,Total H,Total A,Ref1,Ref2,Ref3
"OCT 7, 2017",CLEVELAND,BOSTON,CLEVELAND,29,25,18,30,19,19,33,28,102,99,McCutchen,Smith,Forte


In [6]:
#Setting up the index on the main stats dataframe
mainDFIndex = []
for index, row in statsDF.iterrows():
    if index % 2 == 0 and index != 50:
        mainDFIndex.append(str(row['PLAYER']) + " " + str(row['MIN']))
print(mainDFIndex)

['Gordon Hayward', 'Jayson Tatum', 'Al Horford', 'Jaylen Brown', 'Kyrie Irving', 'Marcus Smart', 'Aron Baynes', 'Semi Ojeleye', 'Terry Rozier', 'Shane Larkin', 'Abdel Nader', 'Daniel Theis', 'LeBron James', 'Jae Crowder', 'Kevin Love', 'Dwyane Wade', 'Derrick Rose', 'Tristan Thompson', 'JR Smith', 'Iman Shumpert', 'Jeff Green', 'Kyle Korver', 'Jose Calderon', 'Channing Frye', 'Cedi Osman']


In [7]:
#Setup dicts with the sub categories
newDict = dict()
something = statsDF.T.to_dict().values()
for index, i in enumerate(something):
    if index % 2 != 0:
        newDict[index] = i

In [18]:
#put them in dataframe, index it with the main index, drop nan columns, rename the columns to their proper categories
finalStats = pd.DataFrame.from_dict(newDict,orient='index')
finalStats.index = mainDFIndex
finalStats.drop(['+/-'], axis=1, inplace=True)
finalStats = finalStats.rename(columns={'PLAYER':'MIN','MIN':'FGM', 'FGM':'FGA', 'FGA':'FG%', 'FG%':'3PM', '3PM':'3PA', '3PA':'3P%', '3P%':'FTM', 'FTM':'FTA', 'FTA':'FT%', 'FT%':'OREB', 'OREB':'DREB', 'DREB':'REB', 'REB':'AST', 'AST':'TOV', 'TOV':'BLK', 'BLK':'PF', 'PF':'PTS', 'PTS':'+/-'})

In [19]:
#This cell will create the mainDF, a multi-indexed dataframe which indexes by date, then team, then player
#multiDFIndex will create a tuple for the multi-index dataframe
#teams variable will bring the teams into a list of lists, which the next line will flatten into one list.
teams = [newGamesDF['Away'].iloc[0] for i  in range(12)],[newGamesDF['Home'].iloc[0] for i  in range(13)]
teams = sum(teams,[])
multiDFIndex = [newGamesDF.index.tolist()*25,teams,mainDFIndex]

In [20]:
#mainDF: The dataframe where the main analysis will take place.
mainDF = finalStats.set_index(multiDFIndex)

In [21]:
#This will take logic about the game itself and add it to the main DataFrame. There should be field for team points,home/away, win/loss

total_points = []
win_loss = []
home_away = []

for index, row in mainDF.iterrows():
    if index[1] == newGamesDF['Away'].iloc[0]:
        total_points.append(newGamesDF['Total A'].iloc[0])
        home_away.append('Away')
    else:
        total_points.append(newGamesDF['Total H'].iloc[0])
        home_away.append('Home')
        
for index, row in mainDF.iterrows():
    if index[1] == newGamesDF['W/L'].iloc[0]:
        win_loss.append('Win')
    else:
        win_loss.append('Loss')
mainDF['Total Points'] = total_points
mainDF['Home/Away'] = home_away
mainDF['W/L'] = win_loss
mainDF.apply(pd.to_numeric, errors=0)

Unnamed: 0,Unnamed: 1,Unnamed: 2,MIN,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,...,AST,TOV,BLK,STL,PF,PTS,+/-,Total Points,Home/Away,W/L
"OCT 7, 2017",BOSTON,Gordon Hayward,,1.0,2.0,50.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,2.0,3.0,99,,
"OCT 7, 2017",BOSTON,Jayson Tatum,,5.0,12.0,41.7,1.0,2.0,50.0,3.0,3.0,100.0,...,3.0,1.0,0.0,0.0,4.0,14.0,6.0,99,,
"OCT 7, 2017",BOSTON,Al Horford,,2.0,7.0,28.6,0.0,2.0,0.0,5.0,7.0,71.4,...,5.0,0.0,0.0,1.0,2.0,9.0,8.0,99,,
"OCT 7, 2017",BOSTON,Jaylen Brown,,11.0,23.0,47.8,2.0,9.0,22.2,1.0,2.0,50.0,...,0.0,3.0,2.0,0.0,5.0,25.0,-5.0,99,,
"OCT 7, 2017",BOSTON,Kyrie Irving,,8.0,17.0,47.1,4.0,9.0,44.4,2.0,2.0,100.0,...,10.0,2.0,3.0,0.0,4.0,22.0,-1.0,99,,
"OCT 7, 2017",BOSTON,Marcus Smart,,5.0,16.0,31.3,0.0,4.0,0.0,2.0,3.0,66.7,...,3.0,2.0,2.0,2.0,2.0,12.0,-8.0,99,,
"OCT 7, 2017",BOSTON,Aron Baynes,,2.0,2.0,100.0,0.0,0.0,0.0,2.0,4.0,50.0,...,1.0,2.0,0.0,1.0,5.0,6.0,-14.0,99,,
"OCT 7, 2017",BOSTON,Semi Ojeleye,,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,-10.0,99,,
"OCT 7, 2017",BOSTON,Terry Rozier,,2.0,6.0,33.3,1.0,3.0,33.3,4.0,4.0,100.0,...,2.0,0.0,4.0,0.0,0.0,9.0,4.0,99,,
"OCT 7, 2017",BOSTON,Shane Larkin,,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,99,,


In [41]:
#This function will calculate the projected fantasy points per game
def fantasy_points(row):
    try:
        score = (int(row['3PM']) * 3) + (int(row['FGM']) * 2) + (int(row['REB']) * 1) + (int(row['AST']) * 1.5) + (int(row['BLK']) * 3) + (int(row['STL']) * 3) + (int(row['TOV']) * -1)
        return score
    except:
        pass

mainDF['Fantasy Score'] = mainDF.apply(fantasy_points, axis=1)

In [44]:
mainDF[['Fantasy Score','AST']]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Fantasy Score,AST
"OCT 7, 2017",BOSTON,Gordon Hayward,3.0,0.0
"OCT 7, 2017",BOSTON,Jayson Tatum,26.5,3.0
"OCT 7, 2017",BOSTON,Al Horford,21.5,5.0
"OCT 7, 2017",BOSTON,Jaylen Brown,37.0,0.0
"OCT 7, 2017",BOSTON,Kyrie Irving,54.0,10.0
"OCT 7, 2017",BOSTON,Marcus Smart,33.5,3.0
"OCT 7, 2017",BOSTON,Aron Baynes,11.5,1.0
"OCT 7, 2017",BOSTON,Semi Ojeleye,0.0,0.0
"OCT 7, 2017",BOSTON,Terry Rozier,25.0,2.0
"OCT 7, 2017",BOSTON,Shane Larkin,1.0,0.0


In [27]:
mainDF['AST']

OCT 7, 2017  BOSTON     Gordon Hayward        0
                        Jayson Tatum          3
                        Al Horford            5
                        Jaylen Brown          0
                        Kyrie Irving         10
                        Marcus Smart          3
                        Aron Baynes           1
                        Semi Ojeleye          0
                        Terry Rozier          2
                        Shane Larkin          0
                        Abdel Nader         NaN
                        Daniel Theis        NaN
             CLEVELAND  LeBron James          9
                        Jae Crowder           2
                        Kevin Love            0
                        Dwyane Wade           3
                        Derrick Rose          2
                        Tristan Thompson      2
                        JR Smith              1
                        Iman Shumpert         0
                        Jeff Green      