In [1]:
import pandas as pd
import numpy as np
import itertools

In [2]:
#statsDF - initial stats dataframe
#gamesDF - initial games dataframe
#newGamesDF - formatted games dataframe with game date as index and rest of record in one row
#finalStats - formatted stats dataframe with player's name as the index
#mainDF: The dataframe where the main analysis will take place.

In [3]:
#STATS CELL: This prepares the stats section into a dataframe
#Passing in csv file and getting rid of the unused rows
statsDF = pd.read_csv('stats.csv', delim_whitespace=True)
statsDF.columns = [col.replace(',', '') for col in statsDF.columns]
# if you want to operate on multiple columns, put them in a list like so:
cols = ['PLAYER', 'MIN', 'FGM','FGA', 'FG%', '3PM', '3PA','3P%','FTM','FTA','FT%','OREB','DREB','REB','AST','TOV','STL','BLK','PF','PTS','+/-']
# pass them to df.replace(), specifying each char and it's replacement:
statsDF[cols] = statsDF[cols].replace({'\$': '', ',': ''}, regex=True)
#Remove columns for team name, totals, and the word 'player'
statsDF = statsDF[statsDF.PLAYER != 'PLAYER']
statsDF = statsDF[statsDF.PLAYER != 'Totals:']
statsDF = statsDF[statsDF.PLAYER != 'Cleveland']
statsDF = statsDF[statsDF.PLAYER != 'BOS:']
statsDF = statsDF[statsDF.PLAYER != 'CLE:']
#Reset Index
statsDF = statsDF.reset_index(drop=True)

In [4]:
#GAMES CELL: This prepares the games section into a dataframe
#Passing in csv file and getting rid of the unused rows
gamesDF = pd.read_csv('games.csv', header= None, names=['a','b','c','d','e','f','g','h','i'], index_col=False, error_bad_lines=False, delim_whitespace=True)
gamesDF.columns = [col.replace(',', '') for col in gamesDF.columns]
#if you want to operate on multiple columns, put them in a list like so:
games_cols = ['a','b','c','d','e','f','g','h','i']
# pass them to df.replace(), specifying each char and it's replacement:
gamesDF[games_cols] = gamesDF[games_cols].replace({'\$': '', ',': ''}, regex=True)
# This will add the winner to row 1, colmumn b
if int(gamesDF['a'].iloc[4]) > int(gamesDF['a'].iloc[8]):
    gamesDF['b'].iloc[0] = gamesDF['a'].iloc[1]
else:
    gamesDF['b'].iloc[0] = gamesDF['a'].iloc[5]

In [5]:
#This cell will arrange the gamesDF into the actual formatted games dataframe, newGamesDF
#Setting up the index on the main stats dataframe
dateIndex = [gamesDF.iloc[9,0] + " " + gamesDF.iloc[9,1][1] + ", " + gamesDF.iloc[9,2]]
#Create an empty dataframe, based on the games date as the index 
newGamesDF = pd.DataFrame(index = dateIndex, columns=['Home','Away','W/L','1st Qtr H','2nd Qtr H','3rd Qtr H','4th Qtr H','1st Qtr A','2nd Qtr A','3rd Qtr A','4th Qtr A','Total H','Total A', 'Ref1','Ref2','Ref3'])
#The next lines will pull the data from the cells in the gamesDF and into the newGamesDF dataframe
newGamesDF['Away'] = gamesDF['a'].iloc[1]
newGamesDF['Home'] = gamesDF['a'].iloc[5]
newGamesDF['W/L'] = gamesDF['b'].iloc[0]
newGamesDF['1st Qtr H'] = gamesDF['b'].iloc[12]
newGamesDF['2nd Qtr H'] = gamesDF['c'].iloc[12]
newGamesDF['3rd Qtr H'] = gamesDF['d'].iloc[12]
newGamesDF['4th Qtr H'] = gamesDF['e'].iloc[12]
newGamesDF['1st Qtr A'] = gamesDF['b'].iloc[11]
newGamesDF['2nd Qtr A'] = gamesDF['c'].iloc[11]
newGamesDF['3rd Qtr A'] = gamesDF['d'].iloc[11]
newGamesDF['4th Qtr A'] = gamesDF['e'].iloc[11]
newGamesDF['Total H'] = gamesDF['f'].iloc[12]
newGamesDF['Total A'] = gamesDF['f'].iloc[11]
newGamesDF['Ref1'] = gamesDF['c'].iloc[22][:-2]
newGamesDF['Ref2'] = gamesDF['e'].iloc[22][:-2]
newGamesDF['Ref3'] = gamesDF['g'].iloc[22]
newGamesDF

Unnamed: 0,Home,Away,W/L,1st Qtr H,2nd Qtr H,3rd Qtr H,4th Qtr H,1st Qtr A,2nd Qtr A,3rd Qtr A,4th Qtr A,Total H,Total A,Ref1,Ref2,Ref3
"OCT 7, 2017",CLEVELAND,BOSTON,CLEVELAND,29,25,18,30,19,19,33,28,102,99,McCutchen,Smith,Forte


In [6]:
#Setting up the index on the main stats dataframe
mainDFIndex = []
for index, row in statsDF.iterrows():
    if index % 2 == 0 and index != 50:
        mainDFIndex.append(str(row['PLAYER']) + " " + str(row['MIN']))
print(mainDFIndex)

['Gordon Hayward', 'Jayson Tatum', 'Al Horford', 'Jaylen Brown', 'Kyrie Irving', 'Marcus Smart', 'Aron Baynes', 'Semi Ojeleye', 'Terry Rozier', 'Shane Larkin', 'Abdel Nader', 'Daniel Theis', 'LeBron James', 'Jae Crowder', 'Kevin Love', 'Dwyane Wade', 'Derrick Rose', 'Tristan Thompson', 'JR Smith', 'Iman Shumpert', 'Jeff Green', 'Kyle Korver', 'Jose Calderon', 'Channing Frye', 'Cedi Osman']


In [7]:
#Setup dicts with the sub categories
newDict = dict()
something = statsDF.T.to_dict().values()
for index, i in enumerate(something):
    if index % 2 != 0:
        newDict[index] = i

In [8]:
#put them in dataframe, index it with the main index, drop nan columns, rename the columns to their proper categories
finalStats = pd.DataFrame.from_dict(newDict,orient='index')
finalStats.index = mainDFIndex
finalStats.drop(['+/-'], axis=1, inplace=True)
finalStats = finalStats.rename(columns={'PLAYER':'MIN','MIN':'FGM', 'FGM':'FGA', 'FGA':'FG%', 'FG%':'3PM', '3PM':'3PA', '3PA':'3P%', '3P%':'FTM', 'FTM':'FTA', 'FTA':'FT%', 'FT%':'REB', 'REB':'DREB', 'DREB':'REB', 'REB':'AST', 'AST':'TOV', 'TOV':'BLK', 'BLK':'PF', 'PF':'PTS', 'PTS':'+/-'})

In [9]:
#This cell will create the mainDF, a multi-indexed dataframe which indexes by date, then team, then player
#multiDFIndex will create a tuple for the multi-index dataframe
#teams variable will bring the teams into a list of lists, which the next line will flatten into one list.
teams = [newGamesDF['Away'].iloc[0] for i  in range(12)],[newGamesDF['Home'].iloc[0] for i  in range(13)]
teams = sum(teams,[])
multiDFIndex = [newGamesDF.index.tolist()*25,teams,mainDFIndex]

In [10]:
#mainDF: The dataframe where the main analysis will take place.
mainDF = finalStats.set_index(multiDFIndex)

In [90]:
#This will take logic about the game itself and add it to the main DataFrame. There should be field for team points,home/away, win/loss
#This is still all fucked up
for index, row in mainDF.iterrows():
    if index[1] == newGamesDF['Away'].iloc[0]:
        mainDF['Total Points'] = newGamesDF['Total A'].iloc[0]
    else:
        mainDF['Total Points'] = newGamesDF['Total H'].iloc[0]


In [78]:
print(mainDF.index[][1])
print(newGamesDF['Home'].iloc[0])

SyntaxError: invalid syntax (<ipython-input-78-1b30987fa7fd>, line 1)

In [86]:
mainDF['Total Points']

OCT 7, 2017  BOSTON     Gordon Hayward      102
                        Jayson Tatum        102
                        Al Horford          102
                        Jaylen Brown        102
                        Kyrie Irving        102
                        Marcus Smart        102
                        Aron Baynes         102
                        Semi Ojeleye        102
                        Terry Rozier        102
                        Shane Larkin        102
                        Abdel Nader         102
                        Daniel Theis        102
             CLEVELAND  LeBron James        102
                        Jae Crowder         102
                        Kevin Love          102
                        Dwyane Wade         102
                        Derrick Rose        102
                        Tristan Thompson    102
                        JR Smith            102
                        Iman Shumpert       102
                        Jeff Green      