In [69]:
import pandas as pd
import json
from itertools import groupby

In [101]:
# load data
with open('../data/games.json', 'r') as f:
    games = json.load(f)
    
actions = []

yes = 0
no = 0
for game in games:
    for quarter in game['quarters']:
        for play in quarter['plays']:
            for action in play['actions']:
                if 'athlete' in action:
                    action['athlete'] = action['athlete']['$oid']
                    yes += 1
                    actions.append(action)
                else:
                    no += 1
                
print(yes)
print(no)

1776
316


In [141]:
def group_by_athlete(actions):
    #one must sort before grouping
    actions_sorted = sorted(actions, key = lambda a: a['athlete'])
    actions_grouped = groupby(actions_sorted, lambda a: a['athlete'])
    action_groups = [{'athlete': key, 'actions': list(group)} for key, group in actions_grouped]
    return action_groups

In [None]:
def parse_dropback(text):
    if 'SG' in text or 'UC' in text:
        return 1
    else:
        return 0
    

In [189]:
def safe_parse_int(text):
    if text.isdigit():
        return int(text)
    else:
        return 0

In [233]:
def compute_qb_stats(qb_action_group):
    actions = qb_action_group
    drop_back_count = len([1 for a in actions 
                           if a['observations']['SG/UC'] in ['SG', 'UC']])
    
    attempts = sum([1 for a in actions
                       if a['observations']['Att'] == '1'])
    
    completions = completions = sum([1 for a in actions
                       if a['observations']['Comp'] == '1'])
    
    total_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['Tyds']),
        actions
    ))
    
    air_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['AirYds']),
        actions
    ))
    
    pressured_count = sum([1 for a in actions
                       if a['observations']['Press'] == '1'])
    
    touchdown_count = sum([1 for a in actions
                       if a['observations']['TD'] == '1'])
    
    interception_count = sum([1 for a in actions
                       if a['observations']['Int'] == '1'])
    
    throwaway_count = sum([1 for a in actions
                       if a['observations']['TA'] == '1'])
    
    sack_count = sum([1 for a in actions
                       if a['observations']['Sk'] == '1'])
    
    hit_count = sum([1 for a in actions
                       if a['observations']['Ht'] == '1'])
    
    hurried_count = sum([1 for a in actions
                       if a['observations']['Hur'] == '1'])
    
    action_count = len(actions)
    
    completion_rate = round(completions / attempts, 3)
    
    air_yards_per_attempt = round(air_yards / attempts, 3)

    
    
    return {
        'dropback_count': drop_back_count,
        'attempts': attempts,
        'completions': completions,
        'completion_rate': completion_rate,
        'total_yards': total_yards,
        'air_yards': air_yards,
        'air_yards_per_attempt': air_yards_per_attempt,
        'pressured_count': pressured_count,
        'touchdown_count': touchdown_count,
        'interception_count': interception_count,
        'throwaway_count': throwaway_count,
        'sack_count': sack_count,
        'hit_count': hit_count,
        'hurried_count': hurried_count
    }
    

In [160]:
qb_actions = filter(lambda a: a['action_type'] == 'quarterback', actions)

In [161]:
qb_action_groups = group_by_athlete(qb_actions)

In [166]:
qb_action_groups[0]

{'actions': [{'action_type': 'quarterback',
   'athlete': '56a6d949634b6675bab230c0',
   'observations': {'Acc': '',
    'AirYds': '6',
    'Att': '1',
    'BP': '',
    'Comp': '1',
    'DPInt': '',
    'Dir': 'RIGHT',
    'Fum': '',
    'Ht': '',
    'Hur': '',
    'Int': '',
    'Note': '',
    'Pock': '1',
    'Press': '',
    'SG/UC': 'SG',
    'Scram?': '',
    'Sk': '',
    'TA': '',
    'TD': '',
    'Tyds': '6',
    'type': 'quarterback'}},
  {'action_type': 'quarterback',
   'athlete': '56a6d949634b6675bab230c0',
   'observations': {'Acc': '',
    'AirYds': '',
    'Att': '',
    'BP': '',
    'Comp': '',
    'DPInt': '',
    'Dir': '',
    'Fum': '',
    'Ht': '',
    'Hur': '',
    'Int': '',
    'Note': '',
    'Pock': '',
    'Press': '1',
    'SG/UC': 'SG',
    'Scram?': '1',
    'Sk': '',
    'TA': '',
    'TD': '',
    'Tyds': '',
    'type': 'quarterback'}},
  {'action_type': 'quarterback',
   'athlete': '56a6d949634b6675bab230c0',
   'observations': {'Acc': '',
    '

In [169]:
actions = qb_action_groups[0]['actions']
drop_back_count = len([1 for a in actions 
                           if a['observations']['SG/UC'] in ['SG', 'UC']])
drop_back_count

76

In [170]:
attempts = sum([1 for a in actions
                       if a['observations']['Att'] == '1'])
attempts

68

In [173]:
completions = sum([1 for a in actions
                       if a['observations']['Comp'] == '1'])
completions

40

In [192]:
total_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['Tyds']),
        actions
    ))
total_yards

390

In [193]:
air_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['AirYds']),
        actions
    ))
air_yards

892

In [196]:
pressured_count = sum([1 for a in actions
                       if a['observations']['Press'] == '1'])
pressured_count

14

In [201]:
touchdown_count = sum([1 for a in actions
                       if a['observations']['TD'] == '1'])
touchdown_count

2

In [202]:
interception_count = sum([1 for a in actions
                       if a['observations']['Int'] == '1'])
interception_count

2

In [203]:
throwaway_count = sum([1 for a in actions
                       if a['observations']['TA'] == '1'])
throwaway_count

0

In [205]:
sack_count = sum([1 for a in actions
                       if a['observations']['Sk'] == '1'])
sack_count

2

In [206]:
hit_count = sum([1 for a in actions
                       if a['observations']['Ht'] == '1'])
hit_count

4

In [208]:
hurried_count = sum([1 for a in actions
                       if a['observations']['Hur'] == '1'])
hurried_count

2

In [217]:
action_count = len(actions)
action_count

76

In [224]:
completion_rate = completions / attempts
completion_rate = round(completion_rate, 3)
completion_rate

0.588

In [226]:
air_yards_per_attempt = round(air_yards / attempts, 3)
air_yards_per_attempt

13.118

In [234]:
compute_qb_stats(actions)

{'air_yards': 892,
 'air_yards_per_attempt': 13.118,
 'attempts': 68,
 'completion_rate': 0.588,
 'completions': 40,
 'dropback_count': 76,
 'hit_count': 4,
 'hurried_count': 2,
 'interception_count': 2,
 'pressured_count': 14,
 'sack_count': 2,
 'throwaway_count': 0,
 'total_yards': 390,
 'touchdown_count': 2}

In [229]:
qb_stats = [{
        'athlete': g['athlete'],
        'stats': compute_qb_stats(g['actions'])
    } for g in action_groups]