In [144]:
import pandas as pd
import numpy as np
import nflgame
from datetime import datetime
from pprint import pprint
import copy
from collections import Counter
import re

In [3]:
def play_type_parsing(item):
    note = item['note']
    desc = item['desc']
    if 'TWO-POINT CONVERSION ATTEMPT' in desc:
        return 'two-point'
    if ' kneel ' in desc:
        return 'kneel'
    if ' field goal is ' in desc:
        return 'fg'
    if ' sacked at ' in desc:
        return 'sacked'
    if ' pass ' in desc:
        return 'pass'
    if ' punts ' in desc:
        return 'punt'
    if ' punt ' in desc:
        return 'punt'
    if 'Two-Minute Warning' in desc:
        return 'misc'
    if 'Timeout #' in desc:
        return 'misc'
    if 'END' == desc.split(' ')[0]:
        return 'misc'
    if note == 'KICKOFF':
        return 'kickoff'
    if note == 'PENALTY':
        return 'penalty'
    if note == 'XP':
        return 'extrapoint'
    if note == 'XPM':
        return 'extrapointmissed'
    if note == 'FUMBLE':
        return 'fumble'
    return 'run'
    

In [135]:
def gen_data(season, gameindex):
    # Pull regular season and post season game data and combine them.
    games = nflgame.games(season,kind='REG')
    post_games = nflgame.games(season,kind='POST')
    for game in post_games:
        games.append(game)
    
    # For each game pull out the game data and combine it with each play's data and store it in a list.
    all_plays = []
    total_loss = 0
    for game in games:
    #for game in [games[gameindex]]:
        # Keep track of the game score using these variables, auto reset at start of 
        # new game due to inconsistent end game reporting in 'desc' attribute
        home_score = 0
        away_score = 0
        
        # Generate a datetime value for the game.
        y = game.schedule['year']
        m = game.schedule['month']
        d = game.schedule['day']
        t = game.schedule['time']
        str_time = str(m) + '/' + str(d) + '/' + str(y) + ' ' + str(t) + 'PM'
        game_dt = datetime.strptime(str_time,'%m/%d/%Y %I:%M%p')
        
        # Pull out the game data.
        game_data = {
            'Date'  : game_dt,
            'GameID': game.schedule['eid'],
            'Season_Type': game.schedule['season_type'],
            'Week'  : game.schedule['week'],
            'Home'  : game.home,
            'Away'  : game.away,
            'Game_Score_Home': game.score_home,
            'Game_Score_Away': game.score_away,
            'Game_Scores': game.scores
        }
        
        # For each play in the game, combine the game data and play data into 
        # a dictionary that gets stored in a list of all play data.
        for play in nflgame.combine_plays([game]):
            
            play_data = copy.deepcopy(game_data)
            
            # Info stored directly in the Play class.
            play_data['desc'] = play.desc
            play_data['down'] = play.down
            play_data['touchdown'] = play.touchdown
            play_data['ydstogo'] = play.yards_togo
            play_data['note'] = play.data['note']
            play_data['posteam'] = play.data['posteam']
            play_data['clock'] = play.data['time']
            
            # Info stored directly in the Drive class for this Play.
            play_data['drive'] = play.drive
            play_data['drive_num'] = play.drive.drive_num
            play_data['qtr'] = play.data['qtr']
            
            # Generate a value based on the game clock for 
            # time under 'X' minutes remaining.
            if len(play.data['time']) > 0:
                t = datetime.strptime(play.data['time'],'%M:%S')
                if t.second > 0:
                    timeunder = t.minute + 1
                else:
                    timeunder = t.minute
                play_data['timeunder'] = timeunder
            else:
                play_data['timeunder'] = play.data['time']
            
            # Remove offset from FieldPos.offset variable used to 
            # store the play's field position.
            if play.yardline:
                play_data['yrdline100'] = 50 + play.yardline.offset
            else:
                play_data['yrdline100'] = None
            
            # Play_Type Evaluation
            play_data['play_type'] = play_type_parsing(play_data)
            
            # Score Diff
            (play_data['scorediff'], home_score, away_score) = calc_play_score(play_data, home_score, away_score)
            play_data['home_score'] = home_score
            play_data['away_score'] = away_score
            all_plays.append(play_data)
        
        total_loss += abs(home_score - play_data['Game_Score_Home']) + abs(away_score - play_data['Game_Score_Away'])
        
        if play_data['GameID'] == gameindex or gameindex == False:
            if home_score != play_data['Game_Score_Home'] or away_score != play_data['Game_Score_Away']:
                print play_data['GameID'], play_data['Home'], play_data['Away'], home_score, away_score, \
                play_data['Game_Score_Home'], play_data['Game_Score_Away'], \
                abs(home_score - play_data['Game_Score_Home']) + abs(away_score - play_data['Game_Score_Away']), \
                total_loss
        
        
    return all_plays

### TODO
*calc_play_score(play_data, home_score, away_score)*

In [179]:
def calc_play_score(item,home_score,away_score):
    # Based on the current play description, determine if the scores 
    # need to be updated and return the current score of the game along
    defTeam = ''
    if item['Home'] != item['posteam']:
        defTeam = item['Home']
    else:
        defTeam = item['Away']
    
    play_points = {'TD':6, 'XP':1, '2PR':2, '2PS':2, 'FG':3, 'SAF':2}
    if item['note'] in play_points.keys():
        if item['note'] == 'SAF':
            if defTeam == item['Home']:
                home_score += play_points[item['note']]
            else:
                away_score += play_points[item['note']]
        elif ' recovered by ' + item['Home'].lower() in item['desc'].lower():
            home_score += play_points[item['note']] + 1
        elif ' recovered by ' + item['Away'].lower() in item['desc'].lower():
            away_score += play_points[item['note']] + 1
        elif (' punts ' in item['desc']) or (' INTERCEPTED by ' in item['desc']) or (re.search('(kicks )[0-9]+( yards from )[A-Z]+( )[0-9]+( to)',item['desc'])):
            if defTeam == item['Home']:
                home_score += play_points[item['note']] + 1
            else:
                away_score += play_points[item['note']] + 1
        elif item['posteam'] == item['Home']:
            home_score += play_points[item['note']]
        else:
            away_score += play_points[item['note']]
        #print item['desc']
        #print item['Home'], item['Game_Score_Home'], item['Away'], item['Game_Score_Away'], home_score, away_score, item['posteam']
    #else:
        #print item['Home'], item['Game_Score_Home'], item['Away'], item['Game_Score_Away'], home_score, away_score
    return home_score-away_score, home_score, away_score

In [189]:
 d = gen_data(2015, False)

2015092006 CHI ARI 23 49 23 48 1 1
2015092400 NYG WAS 32 22 32 21 1 2
2015092701 CAR NO 27 23 27 22 1 3
2015092711 SEA CHI 27 0 26 0 1 4
2015100402 ATL HOU 49 21 48 21 1 5
2015100406 TB CAR 23 38 23 37 1 6
2015100412 NO DAL 33 14 26 20 13 19
2015101103 ATL WAS 27 19 25 19 2 21
2015101104 TB JAC 37 31 38 31 1 22
2015101800 DET CHI 31 41 37 34 13 35
2015101805 CLE DEN 24 26 23 26 1 36
2015102510 NYG DAL 28 20 27 20 1 37
2015110801 CAR GB 37 27 37 29 2 39
2015110807 TB NYG 18 33 18 32 1 40
2015110900 SD CHI 20 22 19 22 1 41
2015111200 NYJ BUF 17 23 17 22 1 42
2015111508 OAK MIN 14 31 14 30 1 43
2015111511 SEA ARI 33 39 32 39 1 44
2015112206 CAR WAS 44 17 44 16 1 45
2015113000 CLE BAL 27 34 27 33 1 46
2015120300 DET GB 23 28 23 27 1 47
2015120602 MIA BAL 14 13 15 13 1 48
2015120605 CHI SF 20 27 20 26 1 49
2015120606 MIN SEA 8 38 7 38 1 50
2015120607 NO CAR 36 41 38 41 2 52
2015120610 OAK KC 20 33 20 34 1 53
2015120613 PIT IND 43 10 45 10 2 55
2015121301 NYJ TEN 30 6 30 8 2 57
2015121304 JA

In [190]:
e = gen_data(2010, '2015100412')

In [191]:

hscr = 0
ascr = 0
for play in d:
    if play['GameID'] == '2015100412':
        if play['home_score']==hscr and play['away_score']==ascr:
            pass
        else:
            hscr = play['home_score'] 
            ascr = play['away_score']
            print play['qtr'], '(' + play['clock'] + ')', play['Home'], hscr, str(play['Away']), ascr, play['note']

1 (09:46) NO 0 DAL 3 FG
1 (03:52) NO 6 DAL 3 TD
1 (03:48) NO 7 DAL 3 XP
2 (13:37) NO 14 DAL 3 TD
2 (13:35) NO 14 DAL 4 XP
3 (11:24) NO 17 DAL 4 FG
3 (08:25) NO 17 DAL 7 FG
3 (01:30) NO 20 DAL 7 FG
4 (08:02) NO 26 DAL 7 TD
4 (07:58) NO 27 DAL 7 XP
4 (01:56) NO 27 DAL 13 TD
4 (01:51) NO 27 DAL 14 XP
5 (14:57) NO 33 DAL 14 TD
