In [436]:
import pandas as pd
import json
from itertools import groupby
from collections import defaultdict

In [437]:
games = []
action_types = defaultdict(lambda : 0)

In [438]:
def preprocess(games):
    for game in games:
        for quarter in game['quarters']:
            for play in quarter['plays']:
                for action in play['actions']:
                    if 'athlete' in action:
                        action['athlete'] = action['athlete']['$oid']
                        
                    if action['action_type'] == 'quarterback':
                        receiver_actions = [a for a in play['actions'] 
                                            if a['action_type'] == 'receiver']
                        if(len(receiver_actions) == 1):
                            action['observations']['Drop'] = receiver_actions[0]['observations']['Drop']
                        else:
                            action['observations']['Drop'] = 0
                    
                            

In [439]:
def get_actions(games, game_filter):
    action_type = game_filter['action_type']

    actions = []
    for game in games:
        for quarter in game['quarters']:
            for play in quarter['plays']:
                for action in play['actions']:
                    if 'athlete' in action:
                        if action['action_type'] == action_type:
                            actions.append(action)



    return actions

In [409]:
# load data
with open('../data/games.json', 'r') as f:
    games = json.load(f)
    preprocess(games)

In [410]:
actions = get_actions(games, {'action_type': 'quarterback'})

In [411]:
def group_by_athlete(actions):
    #one must sort before grouping
    actions_sorted = sorted(actions, key = lambda a: a['athlete'])
    actions_grouped = groupby(actions_sorted, lambda a: a['athlete'])
    action_groups = [{'athlete': key, 'actions': list(group)} for key, group in actions_grouped]
    return action_groups

In [412]:
def parse_dropback(text):
    if 'SG' in text or 'UC' in text:
        return 1
    else:
        return 0
    

In [413]:
def safe_parse_int(text):
    if text.isdigit():
        return int(text)
    else:
        return 0

In [448]:
def compute_qb_stats(qb_action_group):
    actions = qb_action_group
    drop_back_count = len([1 for a in actions 
                           if a['observations']['SG/UC'] in ['SG', 'UC']])
    
    attempts = sum([1 for a in actions
                       if a['observations']['Att'] == '1'])
    
    completions = completions = sum([1 for a in actions
                       if a['observations']['Comp'] == '1'])
    
    total_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['Tyds']),
        actions
    ))
    
    air_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['AirYds']),
        actions
    ))
    
    pressured_count = sum([1 for a in actions
                       if a['observations']['Press'] == '1'])
    
    touchdown_count = sum([1 for a in actions
                       if a['observations']['TD'] == '1'])
    
    interception_count = sum([1 for a in actions
                       if a['observations']['Int'] == '1'])
    
    throwaway_count = sum([1 for a in actions
                       if a['observations']['TA'] == '1'])
    
    sack_count = sum([1 for a in actions
                       if a['observations']['Sk'] == '1'])
    
    hit_count = sum([1 for a in actions
                       if a['observations']['Ht'] == '1'])
    
    hurried_count = sum([1 for a in actions
                       if a['observations']['Hur'] == '1'])
    
    drop_count = sum([1 for a in actions
                       if a['observations']['Drop'] == '1'])
    
    action_count = len(actions)
    
    completion_rate = round(completions / attempts, 3) if attempts > 0 else 0
    
    air_yards_per_attempt = round(air_yards / attempts, 3) if attempts > 0 else 0
    
    

    
    
    return {
        'dropback_count': drop_back_count,
        'attempts': attempts,
        'completions': completions,
        'completion_rate': completion_rate,
        'total_yards': total_yards,
        'air_yards': air_yards,
        'air_yards_per_attempt': air_yards_per_attempt,
        'pressured_count': pressured_count,
        'touchdown_count': touchdown_count,
        'interception_count': interception_count,
        'throwaway_count': throwaway_count,
        'drop_count': drop_count,
        'sack_count': sack_count,
        'hit_count': hit_count,
        'hurried_count': hurried_count
    }
    

In [415]:
qb_actions = filter(lambda a: a['action_type'] == 'quarterback', actions)

In [416]:
qb_action_groups = group_by_athlete(qb_actions)

In [418]:
actions = qb_action_groups[0]['actions']
drop_back_count = len([1 for a in actions 
                           if a['observations']['SG/UC'] in ['SG', 'UC']])
drop_back_count

76

In [419]:
attempts = sum([1 for a in actions
                       if a['observations']['Att'] == '1'])
attempts

68

In [420]:
completions = sum([1 for a in actions
                       if a['observations']['Comp'] == '1'])
completions

40

In [421]:
total_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['Tyds']),
        actions
    ))
total_yards

390

In [422]:
air_yards = sum(map(
        lambda a: safe_parse_int(a['observations']['AirYds']),
        actions
    ))
air_yards

892

In [423]:
pressured_count = sum([1 for a in actions
                       if a['observations']['Press'] == '1'])
pressured_count

14

In [424]:
touchdown_count = sum([1 for a in actions
                       if a['observations']['TD'] == '1'])
touchdown_count

2

In [425]:
interception_count = sum([1 for a in actions
                       if a['observations']['Int'] == '1'])
interception_count

2

In [426]:
throwaway_count = sum([1 for a in actions
                       if a['observations']['TA'] == '1'])
throwaway_count

0

In [427]:
sack_count = sum([1 for a in actions
                       if a['observations']['Sk'] == '1'])
sack_count

2

In [428]:
hit_count = sum([1 for a in actions
                       if a['observations']['Ht'] == '1'])
hit_count

4

In [429]:
hurried_count = sum([1 for a in actions
                       if a['observations']['Hur'] == '1'])
hurried_count

2

In [447]:
drop_count = sum([1 for a in actions
                       if a['observations']['Drop'] == '1'])
drop_count

0

In [431]:
action_count = len(actions)
action_count

76

In [432]:
completion_rate = completions / attempts
completion_rate = round(completion_rate, 3)
completion_rate

0.588

In [433]:
air_yards_per_attempt = round(air_yards / attempts, 3)
air_yards_per_attempt

13.118

In [434]:
compute_qb_stats(actions)

{'air_yards': 892,
 'air_yards_per_attempt': 13.118,
 'attempts': 68,
 'completion_rate': 0.588,
 'completions': 40,
 'dropback_count': 76,
 'hit_count': 4,
 'hurried_count': 2,
 'interception_count': 2,
 'pressured_count': 14,
 'sack_count': 2,
 'throwaway_count': 0,
 'total_yards': 390,
 'touchdown_count': 2}

In [449]:
qb_stats = [{
        'athlete': g['athlete'],
        'stats': compute_qb_stats(g['actions'])
    } for g in qb_actions]