# Data Extraction  

- The actual scorecard for the match below can be viewed at [Cricinfo webpage.](https://www.espncricinfo.com/series/8048/scorecard/335982/royal-challengers-bangalore-vs-kolkata-knight-riders-1st-match-indian-premier-league-2007-08)

- The ball-by-ball data can be downloaded from (https://cricsheet.org/downloads/).



In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os, glob, yaml

data_dir='../datasets/ipl/yaml/'

In [2]:
f = '335982.yaml'
fil=os.path.join(data_dir,  f )
data=yaml.load(open(fil))

In [3]:
print ( data.keys() )
print ()
print ( data['meta'] )
print ()
data['info']

dict_keys(['meta', 'info', 'innings'])

{'data_version': 0.9, 'created': datetime.date(2011, 5, 6), 'revision': 2}



{'city': 'Bangalore',
 'competition': 'IPL',
 'dates': [datetime.date(2008, 4, 18)],
 'gender': 'male',
 'match_type': 'T20',
 'outcome': {'by': {'runs': 140}, 'winner': 'Kolkata Knight Riders'},
 'overs': 20,
 'player_of_match': ['BB McCullum'],
 'teams': ['Royal Challengers Bangalore', 'Kolkata Knight Riders'],
 'toss': {'decision': 'field', 'winner': 'Royal Challengers Bangalore'},
 'umpires': ['Asad Rauf', 'RE Koertzen'],
 'venue': 'M Chinnaswamy Stadium'}

In [4]:
list(data['info']['outcome']['by'].keys())[0]
list(data['info']['outcome']['by'].values())[0]

140

In [5]:
data['innings'][1]

{'2nd innings': {'team': 'Royal Challengers Bangalore',
  'deliveries': [{0.1: {'batsman': 'R Dravid',
     'bowler': 'AB Dinda',
     'non_striker': 'W Jaffer',
     'runs': {'batsman': 1, 'extras': 0, 'total': 1}}},
   {0.2: {'batsman': 'W Jaffer',
     'bowler': 'AB Dinda',
     'extras': {'wides': 1},
     'non_striker': 'R Dravid',
     'runs': {'batsman': 0, 'extras': 1, 'total': 1}}},
   {0.3: {'batsman': 'W Jaffer',
     'bowler': 'AB Dinda',
     'non_striker': 'R Dravid',
     'runs': {'batsman': 0, 'extras': 0, 'total': 0}}},
   {0.4: {'batsman': 'W Jaffer',
     'bowler': 'AB Dinda',
     'non_striker': 'R Dravid',
     'runs': {'batsman': 1, 'extras': 0, 'total': 1}}},
   {0.5: {'batsman': 'R Dravid',
     'bowler': 'AB Dinda',
     'non_striker': 'W Jaffer',
     'runs': {'batsman': 1, 'extras': 0, 'total': 1}}},
   {0.6: {'batsman': 'W Jaffer',
     'bowler': 'AB Dinda',
     'non_striker': 'R Dravid',
     'runs': {'batsman': 0, 'extras': 0, 'total': 0}}},
   {0.7: {'ba

In [38]:
def print_scorecard(f, data_dir='./'):
    
    fil=os.path.join(data_dir,  f )
    #data=yaml.load(open(fil))
    with open(fil, 'r') as stream:
        try:
            data = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)

    # obtain date
    try:
        match_date=data['info']['dates'][0].strftime('%Y-%m-%d')
    except:
        match_date=data['info']['dates'][0]
    
    batting_card = {}    
    for inn in range(2):
        inn_name = list(data['innings'][inn].keys())[0].strip()

        batting_card_inn={}
        batting_card_inn[inn_name] = {'0':' ', '1':' ', '2':' ', '3':' ', '4':' ','5':' ', '6':' ', 'R':' ', 'B':' ', 'NOT-OUT':' '}
        
        runs_bowler={}
        runs_extra = 0
        runs_total = 0
        wkts       = 0

        for delivery in list(data['innings'][inn].values())[0]['deliveries']:
            deliv = list(delivery.items())[0]
            
            
            ball     = deliv[0]
            batsman  = deliv[1]['batsman'].strip()
            bowler   = deliv[1]['bowler'].strip()
            runs_bat = deliv[1]['runs'].get('batsman', 0)
            runs_ext = deliv[1]['runs'].get('extras',  0)
            runs_tot = deliv[1]['runs'].get('total',   0)
            
            if batsman not in batting_card_inn:
                batting_card_inn[batsman] = {'0':0, '1':0, '2':0, '3':0, '4':0, '5':0, '6':0, 'R':0, 'B':0, 'NOT-OUT':True}

            batting_card_inn[batsman][str(runs_bat)] = batting_card_inn[batsman][str(runs_bat)]+1
            batting_card_inn[batsman]['R'] += runs_bat
            batting_card_inn[batsman]['B'] += 1 # remove the ball later if it's a wide
            
            if 'extras' in deliv[1]:
                if 'wides' in deliv[1]['extras']:
                    batting_card_inn[batsman]['B'] -= 1 # remove the ball from batsman's account

            runs_extra += runs_ext
            runs_total += runs_tot
            
            if deliv[1].get('wicket', None):
                wkts += 1
                player_out=deliv[1]['wicket']['player_out']

                # case with run out without facing the ball
                if  player_out not in batting_card_inn:
                    batting_card_inn[player_out] = {'0':0, '1':0, '2':0, '3':0, '4':0, '5':0, '6':0, 'R':0, 'B':0, 'NOT-OUT':True}

                batting_card_inn[player_out]['NOT-OUT']=False

        batting_card_inn['Total_inn'+str(inn+1)] = {'0':' ', '1':' ', '2':' ', '3':' ', '4':' ','5':' ', '6':' ',
                                                    'R':str(runs_total)+'-'+str(wkts), 'B':' ', 'NOT-OUT':' '}
        batting_card_inn['  '] = {'0':' ', '1':' ', '2':' ', '3':' ', '4':' ','5':' ', '6':' ', 'R':' ', 'B':' ', 'NOT-OUT':' '}
        batting_card.update(batting_card_inn)
    
    df = pd.DataFrame(batting_card).transpose()
    df.style.set_caption("Title")

    print (data['info']['teams'][0] ,' vs ', data['info']['teams'][1], ', ', match_date)
    print ( 'Toss \t\t\t', data['info']['toss']['winner'], 'Decided to', data['info']['toss']['decision'])
    print ('Result \t\t\t', data['info']['outcome']['winner'], 'won by ', 
           list(data['info']['outcome']['by'].values())[0], list(data['info']['outcome']['by'].keys())[0])
    print ('Player of the Match\t', data['info']['player_of_match'][0])

    #display(df)
    
    df_to_return=df.copy().drop(['0', '1', '2', '3'], axis=1)
    df_to_return=df_to_return.drop(['1st innings', '2nd innings', 'Total_inn1', 'Total_inn2', '  '], axis=0)
    display(df_to_return)

    #return df_to_return
    
#f = '335982.yaml'

#df = print_scorecard(f, data_dir=data_dir)



f2='../datasets/ipl/yaml/336002.yaml'
print (f2)
data = yaml.load(open(fil))

print_scorecard(f2)




../datasets/ipl/yaml/336002.yaml
Deccan Chargers  vs  Royal Challengers Bangalore ,  2008-05-25
Toss 			 Deccan Chargers Decided to bat
Result 			 Royal Challengers Bangalore won by  5 wickets
Player of the Match	 R Vinay Kumar


Unnamed: 0,4,5,6,R,B,NOT-OUT
AC Gilchrist,4,0,2,46,37,False
HH Gibbs,6,0,2,47,34,False
RG Sharma,2,0,0,17,16,False
LPC Silva,0,0,0,0,2,False
Y Venugopal Rao,2,0,2,26,12,False
DB Ravi Teja,1,0,0,7,7,False
AS Yadav,0,0,1,8,8,False
SB Bangar,0,0,0,0,0,False
WPUJC Vaas,0,0,0,0,3,False
RP Singh,0,0,0,1,3,False


../datasets/ipl/yaml/336002.yaml


KeyError: 'SB Bangar'