In [1]:
import numpy as np
import pandas as pd
import csv

### Which event codes matter here:
*  made field goal attempts
*  made final free throw attempt
*  missed final free throw attempt that results in a defensive rebound
*  missed field goal attempt that results in a defensive rebound
* (5) turnover
* (13) end of time period.
* (8) substitutions

## Notes
* Deadball rebounds are always offensive, so they do not matter.

Keep in mind we have to keep track of each players:
* Number of offensive possesions
* Number of defensive possessions
* Team points allowed while on court
* Team points scored while on court

## Assumptions

* In the Game_Lineup.txt file, the 0th period lists all players for each team, and then each period lists the starters for that period.

#### Clean the Object data

In [2]:
game_lineup = pd.read_csv('Game_Lineup.txt',sep='\s+')
play_by_play = pd.read_csv('Play_by_Play.txt',sep='\s+')
event_codes = pd.read_csv('Event_Codes.txt',sep='\s+',)

In [3]:
for col in game_lineup.columns.values:
    if game_lineup[col].dtype=='object':
        game_lineup[col] = game_lineup[col].str.strip()
        
for col in play_by_play.columns.values:
    if play_by_play[col].dtype=='object':
        play_by_play[col] = play_by_play[col].str.strip()
        
for col in event_codes.columns.values:
    if event_codes[col].dtype=='object':
        event_codes[col] = event_codes[col].str.strip()

#### Order the games by Event_Num and create NaN values to clear up data.

In [4]:
play_by_play = play_by_play.sort_values(["Game_id","Event_Num"])

play_by_play = play_by_play.replace('0370a0d090da0d0edc6319f120187e0e','NBA')
play_by_play = play_by_play.replace('1473d70e5646a26de3c52aa1abd85b1f','NBA')

### Lets try to get this working for one single game.

In [5]:
game_id = "006728e4c10e957011e1f24878e6054a"
game1 = play_by_play[play_by_play.Game_id==game_id]

In [6]:
cols = game1.columns.tolist()

In [58]:
players = pd.DataFrame(index=game_lineup[(game_lineup.Game_id==game_id) & (game_lineup.Period==0)]['Person_id'].tolist(),columns=['PossessionsFor','PointsFor','PossessionsAgainst','PointsAgainst'])
players['Person_id'] = players.index
players = players.fillna(0)

players['Team_id'] = game_lineup[(game_lineup.Game_id==game_id) & (game_lineup.Period==0)]['Team_id'].tolist()

for row in game1.iterrows():
    
    instance = row[1].tolist()
    
    event_msg_type = instance[cols.index("Event_Msg_Type")]
    
    
    if event_msg_type==1: # made shot
         
        points = instance[cols.index("Option1")]
        team_for = instance[cols.index("Team_id")]
        players.PointsFor = players.PointsFor + (players.where((players.Person_id.isin(on_court)) & (players.Team_id==team_for))['PointsFor'].apply(lambda x: x*0) + points).fillna(0)
        players.PointsAgainst = players.PointsAgainst + (players.where((players.Person_id.isin(on_court)) & (players.Team_id!=team_for))['PointsAgainst'].apply(lambda x: x*0) + points).fillna(0)
        
    if event_msg_type==3 and game1.iloc[(row[0]+1)]["Event_Msg_Type"]!=3: 
        
        # last free throw taken, option1 has the 
        # total points accumulated on the taken free throws
        
        points = instance[cols.index("Option1")]
        team_for = instance[cols.index("Team_id")]
        players.PointsFor = players.PointsFor + (players.where((players.Person_id.isin(on_court)) & (players.Team_id==team_for))['PointsFor'].apply(lambda x: x*0) + points).fillna(0)
        players.PointsAgainst = players.PointsAgainst + (players.where((players.Person_id.isin(on_court)) & (players.Team_id!=team_for))['PointsAgainst'].apply(lambda x: x*0) + points).fillna(0)
        
    if event_msg_type==12:

        period = instance[cols.index("Period")]
        on_court = game_lineup[(game_lineup.Game_id==game_id) & (game_lineup.Period==period)]['Person_id'].tolist()