In [4]:
import numpy as np
import pandas as pd
import csv

### Which event codes matter here:
*  made field goal attempts
*  made final free throw attempt
*  missed final free throw attempt that results in a defensive rebound
*  missed field goal attempt that results in a defensive rebound
* (5) turnover
* (13) end of time period.
* (8) substitutions

## Notes
* Deadball rebounds are always offensive, so they do not matter.

Keep in mind we have to keep track of each players:
* Number of offensive possesions
* Number of defensive possessions
* Team points allowed while on court
* Team points scored while on court

## Assumptions

* In the Game_Lineup.txt file, the 0th period lists all players for each team, and then each period lists the starters for that period.

#### Clean the Object data

In [5]:
game_lineup = pd.read_csv('Game_Lineup.txt',sep='\s+')
play_by_play = pd.read_csv('Play_by_Play.txt',sep='\s+')
event_codes = pd.read_csv('Event_Codes.txt',sep='\s+',)

In [6]:
for col in game_lineup.columns.values:
    if game_lineup[col].dtype=='object':
        game_lineup[col] = game_lineup[col].str.strip()
        
for col in play_by_play.columns.values:
    if play_by_play[col].dtype=='object':
        play_by_play[col] = play_by_play[col].str.strip()
        
for col in event_codes.columns.values:
    if event_codes[col].dtype=='object':
        event_codes[col] = event_codes[col].str.strip()

#### Order the games by Event_Num and create NaN values to clear up data.

In [7]:
play_by_play = play_by_play.sort_values(["Game_id","Event_Num"])

play_by_play = play_by_play.replace('0370a0d090da0d0edc6319f120187e0e','NBA')
play_by_play = play_by_play.replace('1473d70e5646a26de3c52aa1abd85b1f','NBA')

### Lets try to get this working for one single game.

In [36]:
game_id = "006728e4c10e957011e1f24878e6054a"
game1 = play_by_play[play_by_play.Game_id==game_id]

In [48]:
cols = game1.columns.tolist()
cols

['Game_id',
 'Event_Num',
 'Event_Msg_Type',
 'Period',
 'WC_Time',
 'PC_Time',
 'Action_Type',
 'Option1',
 'Option2',
 'Option3',
 'Team_id',
 'Person1',
 'Person2',
 'Person3',
 'Team_id_type',
 'Person1_type',
 'Person2_type',
 'Person3_type']

In [64]:
players = pd.DataFrame(index=game_lineup[(game_lineup.Game_id==game_id) & (game_lineup.Period==0)]['Person_id'].tolist(),columns=['Possessions','PointsFor','PointsAgainst'])
players = players.fillna(0)

for row in game1.iterrows():
    
    instance = row[1].tolist()
    
    event_msg_type = instance[cols.index("Event_Msg_Type")]
    
    
    if event_msg_type==1:
        
        points = instance[cols.index("Option1")]
    
    
    if event_msg_type==12:
        for team in game1.Team_id.unique():
            if team != 'NBA':
                period = instance[cols.index("Period")]
                on_court = game_lineup[(game_lineup.Game_id==game_id) & (game_lineup.Team_id==team) & (game_lineup.Period)]['Person_id'].tolist()

In [65]:
players

Unnamed: 0,Possessions,PointsFor,PointsAgainst
ae53f8ba6761b64a174051da817785bc,0,0,0
44230324724c84f122ac62a5f0918314,0,0,0
8d2127290c94bd41b82a2938734bc750,0,0,0
48ec4e6c52f418d5ca4ef510ba473ea0,0,0,0
e816ff284dc3f965b8f3d605a3b91bae,0,0,0
ed95dff5440fadf3042b5acacea81eed,0,0,0
616281dee946056b071699476fdee9ec,0,0,0
e814950408915f43de2b079dce7c21c5,0,0,0
2ad626904c8b28cceb8e12c624a84240,0,0,0
bd45fe7dba52aa2cd00ba80ff107d05b,0,0,0
