In [1]:
import pandas as pd
import yaml
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
with open('odis/656423.yaml','r') as file:
    game = yaml.load(file)

In [3]:
game.keys()

['info', 'innings', 'meta']

In [4]:
game['info']

{'dates': [datetime.date(2015, 2, 22)],
 'match_type': 'ODI',
 'neutral_venue': 1,
 'outcome': {'by': {'runs': 130}, 'winner': 'India'},
 'overs': 50,
 'player_of_match': ['S Dhawan'],
 'teams': ['India', 'South Africa'],
 'toss': {'decision': 'bat', 'winner': 'India'},
 'umpires': ['Aleem Dar', 'RA Kettleborough'],
 'venue': 'Melbourne Cricket Ground'}

In [5]:
bool(game['info'].get('player_of_match'))

True

In [6]:
def get_game_info(info_dict):
    start_date = info_dict['dates'][0]
    venue = info_dict['venue']
    city = info_dict['city']
    match_type = info_dict['match_type']
    neutral = info_dict.get('neutral_venue',0)
    toss_winner = info_dict['toss']['winner']
    toss_loser = list(set(info_dict['teams']) - set([toss_winner]))[0]
    toss_decision = info_dict['toss']['decision']
    team_inn1 = toss_winner if toss_decision == 'bat' else toss_loser
    team_inn2 = list(set(info_dict['teams']) - set([team_inn1]))
    umpires = '|'.join(info_dict['umpires'])
    man_of_match = '|'.join(info_dict['player_of_match'])
    if info_dict.get('result') is not None:
        match_winner = info_dict['outcome']['by']['winner']
        win_type = info_dict['outcome']['by'].keys()[0]
        margin = info_dict['outcome']['by'][win_type]
    else:
        match_winner = info_dict['result']
        win_type = 'NA'
        margin = 'NA'
    overs = info_dict['overs']
    return (start_date, city, venue, neutral, match_type, overs, team_inn1, team_inn2, toss_winner, toss_decision, 
            match_winner, win_type, win_margin, umpires, man_of_match)

In [7]:
ball = []
bowler = []
batsman = []
non_striker = []
batted_runs = []
extras = []
total_runs = []
wickets = []

In [8]:
for b in game['innings'][0]['1st innings']['deliveries']:
    ball_details = b[b.keys()[0]]
    ball.append(str(b.keys()[0]))
    bowler.append(ball_details['bowler'])
    batsman.append(ball_details['batsman'])
    non_striker.append(ball_details['non_striker'])
    batted_runs.append(ball_details['runs']['batsman'])
    extras.append(ball_details['runs']['extras'])
    total_runs.append(ball_details['runs']['total'])
    try:
        ball_details['wicket']
        wickets.append(True)
    except KeyError:
        wickets.append(False)

In [9]:
pbyp = pd.DataFrame({'ball':ball,
                     'bowler':bowler,
                     'batsman':batsman,
                     'non_striker':non_striker,
                     'batted_runs':batted_runs,
                     'extras':extras,
                     'total_runs':total_runs,
                     'wicket':wickets})

In [10]:
pbyp['innings_total'] = np.cumsum(pbyp.total_runs)
pbyp['wickets_fallen'] = np.cumsum(pbyp.wicket)

In [11]:
pbyp.head()

Unnamed: 0,ball,batsman,batted_runs,bowler,extras,non_striker,total_runs,wicket,innings_total,wickets_fallen
0,0.1,RG Sharma,0,DW Steyn,0,S Dhawan,0,False,0,0
1,0.2,RG Sharma,0,DW Steyn,0,S Dhawan,0,False,0,0
2,0.3,RG Sharma,0,DW Steyn,0,S Dhawan,0,False,0,0
3,0.4,RG Sharma,0,DW Steyn,0,S Dhawan,0,False,0,0
4,0.5,RG Sharma,0,DW Steyn,0,S Dhawan,0,False,0,0


In [12]:
game['innings'][0]['1st innings']['deliveries'][:5]

[{0.1: {'batsman': 'RG Sharma',
   'bowler': 'DW Steyn',
   'non_striker': 'S Dhawan',
   'runs': {'batsman': 0, 'extras': 0, 'total': 0}}},
 {0.2: {'batsman': 'RG Sharma',
   'bowler': 'DW Steyn',
   'non_striker': 'S Dhawan',
   'runs': {'batsman': 0, 'extras': 0, 'total': 0}}},
 {0.3: {'batsman': 'RG Sharma',
   'bowler': 'DW Steyn',
   'non_striker': 'S Dhawan',
   'runs': {'batsman': 0, 'extras': 0, 'total': 0}}},
 {0.4: {'batsman': 'RG Sharma',
   'bowler': 'DW Steyn',
   'non_striker': 'S Dhawan',
   'runs': {'batsman': 0, 'extras': 0, 'total': 0}}},
 {0.5: {'batsman': 'RG Sharma',
   'bowler': 'DW Steyn',
   'non_striker': 'S Dhawan',
   'runs': {'batsman': 0, 'extras': 0, 'total': 0}}}]

In [13]:
all_balls = dict(game['innings'][0].items() + game['innings'][1].items())

In [14]:
all_balls.keys()

['1st innings', '2nd innings']

In [15]:
[str(b.keys()[0]).split('.') for b in all_balls['1st innings']['deliveries']][0:5]

[['0', '1'], ['0', '2'], ['0', '3'], ['0', '4'], ['0', '5']]

In [16]:
def get_ball_by_ball(innings_obj):
    all_balls = dict(innings_obj[0].items() + innings_obj[1].items())
    all_info = []
    for innings in all_balls.keys():
        batting_side = all_balls[innings]['team']
        balls = all_balls[innings]['deliveries']
        for b in balls:
            ball_key = b.keys()[0]
            ball_id = str(ball_key).split('.')
            over_number = ball_id[0]
            ball_in_over = ball_id[1]
            ball_details = b[ball_key]
            bowler = ball_details['bowler']
            batsman = ball_details['batsman']
            non_striker = ball_details['non_striker']
            batted_runs = ball_details['runs']['batsman']
            total_extras = ball_details['runs']['extras']
            total_runs = ball_details['runs']['total']
            if ball_details.get('extras') is not None:
                xb = ball_details['extras'].get('byes',None)
                xlb = ball_details['extras'].get('legbyes',None)
                xnb = ball_details['extras'].get('noballs',None)
                xw = ball_details['extras'].get('wides',None)
                xpen = ball_details['extras'].get('penalty',None)
            else:
                xb, xlb, xnb, xw, xpen = (None,) * 5
            non_boundary = ball_details['runs'].get('non_boundary',0)
            if ball_details.get('wicket') is not None:
                wicket = 1
                kind = ball_details['wicket']['kind']
                player_out = ball_details['wicket']['player_out']
                fielders = '|'.join(ball_details['wicket']['fielders']) if bool(ball_details['wicket'].get('fielders')) else None
            else:
                wicket, kind, player_out, fielders = (None,) * 4
            #return (innings, batting_side, over_number, ball_in_over, bowler, batsman, non_striker, batted_runs, xb, xlb, xnb, xw, xpen, total_extras, total_runs, non_boundary, wicket, kind, player_out, fielders)
            all_info.append((innings, batting_side, over_number, ball_in_over, bowler, batsman, non_striker, batted_runs, xb, xlb, xnb, xw, xpen, total_extras, total_runs, non_boundary, wicket, kind, player_out, fielders))
    return all_info

In [17]:
len(get_ball_by_ball(game['innings']))

556

In [18]:
COLUMN_NAMES = ['game_id','innings','batting_side','over_number','ball_in_over','bowler','batsman','non_striker','batted_runs','xb','xlb','xnb','xw','xpen','total_extras','total_runs','non_boundary','wicket','kind','player_out','fielders']

In [19]:
indvsa = pd.DataFrame(get_ball_by_ball(game['innings']), columns=COLUMN_NAMES[1:])

In [20]:
pbp_ALL = pd.DataFrame(columns=COLUMN_NAMES[1:])

In [22]:
pd.concat([pbp_ALL, indvsa], axis = 0).head()

Unnamed: 0,innings,batting_side,over_number,ball_in_over,bowler,batsman,non_striker,batted_runs,xb,xlb,xnb,xw,xpen,total_extras,total_runs,non_boundary,wicket,kind,player_out,fielders
0,1st innings,India,0,1,DW Steyn,RG Sharma,S Dhawan,0,,,,,,0,0,0,,,,
1,1st innings,India,0,2,DW Steyn,RG Sharma,S Dhawan,0,,,,,,0,0,0,,,,
2,1st innings,India,0,3,DW Steyn,RG Sharma,S Dhawan,0,,,,,,0,0,0,,,,
3,1st innings,India,0,4,DW Steyn,RG Sharma,S Dhawan,0,,,,,,0,0,0,,,,
4,1st innings,India,0,5,DW Steyn,RG Sharma,S Dhawan,0,,,,,,0,0,0,,,,


In [25]:
pd.DataFrame(None, columns = ['game_id','innings','batting_side','over_number','ball_in_over','bowler','batsman','non_striker','batted_runs','xb','xlb','xnb','xw','xpen','total_extras','total_runs','non_boundary','wicket','kind','player_out','fielders'])

Unnamed: 0,game_id,innings,batting_side,over_number,ball_in_over,bowler,batsman,non_striker,batted_runs,xb,...,xnb,xw,xpen,total_extras,total_runs,non_boundary,wicket,kind,player_out,fielders


In [23]:
all_games = pd.read_csv('odi_ballbyball.csv')

IOError: File odi_ballbyball.csv does not exist

In [None]:
all_games

In [None]:
all_games.groupby(['innings','over_number']).total_runs.mean()*6

In [None]:
all_games.groupby('over_number').inn_runs.mean()

In [None]:
all_games.groupby(['innings','over_number']).inn_runs.mean().plot()

In [None]:
(all_games.groupby(['innings','over_number']).total_runs.mean()*6).plot()