# IPL-2020

In [30]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os, glob, yaml, pickle
import datetime

from utils import get_match_list, add_overs
from print_scorecard import print_scorecard

data_dir='../datasets/ipl/yaml_2020/'

### Load the saved dataframes

In [24]:
def get_2020_data():
    
    df_bat  = pickle.load(open('./database/batting_record_all_years.df', 'rb'))
    df_bowl = pickle.load(open('./database/bowling_record_all_years.df', 'rb'))

    df_bat_20 = df_bat[df_bat['season']=='2020']
    df_bowl_20 = df_bowl[df_bowl['season']=='2020']
    
    return (df_bat_20, df_bowl_20)

In [21]:
df_bat, df_bowl = get_2020_data()
df_bat.head(2)

## Top Batting Statistics

In [25]:
def batting_summary_():
    df, _ = get_2020_data()
    all_players = df['batsman'].unique()
    
    data=[]
    for player in all_players:
        dfp = df[ df['batsman']==player ]
        
        Inns_ = dfp.shape[0]
        Runs_ = dfp.Runs.sum()
        BF_   = dfp.BF.sum()
        NOs_  = dfp.NO.sum()
        
        HS    = max(dfp.Runs)
        Fifty = ((dfp.Runs>=50) & (dfp.Runs<100) ).sum()
        Hundred = (dfp.Runs>=100).sum()
        
        Wins  = sum(dfp.Win)
        Toss_wins = sum(dfp.Toss)
        
        SR    = np.round(100*Runs_/(BF_+0.1), 2) # add 0.1 to avoide deviding by 0
        
        if Inns_== NOs_:
            Ave=dfp.Runs.sum() 
        else:
            Ave   = np.round(Runs_/(Inns_-NOs_), 2)
        Fours = dfp['4s'].sum()
        Sixes = dfp['6s'].sum()

        data.append([player, Inns_, NOs_, Runs_, BF_, HS, Ave, SR, Fifty, Hundred, Fours, Sixes] )
    df_p = pd.DataFrame(data, columns=['player', 'Innings', 'NO', 'Runs', 'BF', 'HS',
                                       'Ave','SR', '50s', '100s', '4s', '6s'])
    return df_p

df_summary= batting_summary_()
df_summary.head(2)

Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
0,RG Sharma,12,0,332,260,80,27.67,127.64,3,0,27,19
1,Q de Kock,16,2,503,358,78,35.93,140.46,4,0,46,22


In [28]:
def sorted_table(sort_by='Runs', min_runs=300):
    df_summary = batting_summary_()
    df_summary = df_summary[ df_summary['Runs']>=min_runs]
    df_sorted  = df_summary.sort_values(by=[sort_by], ascending=False)
    return df_sorted

def get_top_players():
    sorting_list=['Runs', 'BF', 'Ave', 'SR', '50s', '100s', '4s', '6s']
    for sort_item in sorting_list:
        df_sorted = sorted_table(sort_by=sort_item)
        print ('Sorted by :',sort_item)
        display(df_sorted[:5])
get_top_players()

Sorted by : Runs


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
27,KL Rahul,14,2,670,518,132,55.83,129.32,5,1,58,23
19,S Dhawan,17,3,618,427,106,44.14,144.7,4,2,67,13
41,DA Warner,16,2,548,407,85,39.14,134.61,4,0,52,14
21,SS Iyer,17,2,519,421,88,34.6,123.25,3,0,40,16
81,Ishan Kishan,13,4,516,354,99,57.33,145.72,4,0,36,30


Sorted by : BF


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
27,KL Rahul,14,2,670,518,132,55.83,129.32,5,1,58,23
19,S Dhawan,17,3,618,427,106,44.14,144.7,4,2,67,13
21,SS Iyer,17,2,519,421,88,34.6,123.25,3,0,40,16
41,DA Warner,16,2,548,407,85,39.14,134.61,4,0,52,14
38,V Kohli,15,4,466,384,90,42.36,121.32,3,0,23,11


Sorted by : Ave


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
81,Ishan Kishan,13,4,516,354,99,57.33,145.72,4,0,36,30
27,KL Rahul,14,2,670,518,132,55.83,129.32,5,1,58,23
37,AB de Villiers,14,4,454,286,73,45.4,158.69,5,0,33,23
82,KS Williamson,11,4,317,237,67,45.29,133.7,3,0,26,10
19,S Dhawan,17,3,618,427,106,44.14,144.7,4,2,67,13


Sorted by : SR


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
30,N Pooran,14,4,353,208,77,35.3,169.63,2,0,23,25
54,SV Samson,14,1,375,236,85,28.85,158.83,3,0,21,26
37,AB de Villiers,14,4,454,286,73,45.4,158.69,5,0,33,23
28,MA Agarwal,11,0,424,271,106,38.55,156.4,2,1,44,15
24,MP Stoinis,17,3,352,237,65,25.14,148.46,3,0,31,16


Sorted by : 50s


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
37,AB de Villiers,14,4,454,286,73,45.4,158.69,5,0,33,23
27,KL Rahul,14,2,670,518,132,55.83,129.32,5,1,58,23
35,D Padikkal,15,0,473,379,74,31.53,124.77,5,0,51,8
41,DA Warner,16,2,548,407,85,39.14,134.61,4,0,52,14
2,SA Yadav,15,3,480,331,79,40.0,144.97,4,0,61,11


Sorted by : 100s


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
19,S Dhawan,17,3,618,427,106,44.14,144.7,4,2,67,13
27,KL Rahul,14,2,670,518,132,55.83,129.32,5,1,58,23
28,MA Agarwal,11,0,424,271,106,38.55,156.4,2,1,44,15
0,RG Sharma,12,0,332,260,80,27.67,127.64,3,0,27,19
41,DA Warner,16,2,548,407,85,39.14,134.61,4,0,52,14


Sorted by : 4s


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
19,S Dhawan,17,3,618,427,106,44.14,144.7,4,2,67,13
2,SA Yadav,15,3,480,331,79,40.0,144.97,4,0,61,11
27,KL Rahul,14,2,670,518,132,55.83,129.32,5,1,58,23
41,DA Warner,16,2,548,407,85,39.14,134.61,4,0,52,14
35,D Padikkal,15,0,473,379,74,31.53,124.77,5,0,51,8


Sorted by : 6s


Unnamed: 0,player,Innings,NO,Runs,BF,HS,Ave,SR,50s,100s,4s,6s
81,Ishan Kishan,13,4,516,354,99,57.33,145.72,4,0,36,30
54,SV Samson,14,1,375,236,85,28.85,158.83,3,0,21,26
30,N Pooran,14,4,353,208,77,35.3,169.63,2,0,23,25
67,EJG Morgan,14,4,418,302,68,41.8,138.36,1,0,32,24
37,AB de Villiers,14,4,454,286,73,45.4,158.69,5,0,33,23


## Top Bowling Stats

In [31]:
def bowling_summary():
    _, df = get_2020_data()
    all_players = df['bowler'].unique()
    
    data=[]
    for player in all_players:
        
        dfp = df[ df['bowler'] == player ]
        Inns_ = dfp.shape[0]
        
        Ovs_  = add_overs(dfp['O'].values)
        Wkts_ = dfp['W'].sum()

        #HS_   = max(dfp.Runs) to be calculated for best bowling figures
        SR_   = 0.
        Ave_  = 0.
        Fours_= dfp['4s'].sum()
        Sixes_= dfp['6s'].sum()
        WDs_  = dfp['WD'].sum()
        NBs_  = dfp['NB'].sum()

        NoWs_= ((dfp['W']==0)).sum()
        ThreeWs_= ((dfp['W']>=3)).sum()
        FourWs_ = ((dfp['W']>=4)).sum()
        FiveWs_ = ((dfp['W']>=5)).sum()

        data.append([player, Inns_, Ovs_, Wkts_, SR_, Ave_, Fours_, Sixes_,
                     WDs_, NBs_, NoWs_, ThreeWs_, FourWs_, FiveWs_])
    
    df_p = pd.DataFrame(data, columns=['Player', 'Innings', 'Overs', 'Wickets', 
                                       'SR', 'Ave', 'Fours', 'Sixes', 'WDs', 'NBs',
                                       '0-Fers', '3-Fers', '4-Fers', '5-Fers']) 
    
    return df_p

df_summary= bowling_summary()
df_summary.head(2)

Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
0,DL Chahar,14,52,12,0.0,0.0,45,6,9,0,7,0,0,0
1,SM Curran,13,42,13,0.0,0.0,36,8,11,2,5,2,0,0


In [33]:
def sorted_table(sort_by='Wickets', min_wkts=10, ascending=False):
    df_summary = bowling_summary()
    df_summary = df_summary[ (df_summary['Wickets']>=min_wkts) ]
    df_sorted  = df_summary.sort_values(by=[sort_by], ascending=ascending)
    return df_sorted

def get_top_players():
    sorting_list = ['Wickets', '3-Fers', 'Fours', 'Sixes', 'WDs', 'NBs']
    for sort_item in sorting_list:
        print ('Sorted by :',sort_item)

        if sort_item in ['Fours', 'Sixes', 'WDs', 'NBs']:
            df_sorted = sorted_table(sort_by=sort_item, ascending=True)
        else:
            df_sorted = sorted_table(sort_by=sort_item, ascending=False)

        display(df_sorted[:5])

get_top_players()

Sorted by : Wickets


Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
17,K Rabada,17,65.4,30,0.0,0.0,51,21,15,2,3,4,2,0
7,JJ Bumrah,15,60.0,27,0.0,0.0,37,13,11,1,4,5,2,0
5,TA Boult,15,57.2,25,0.0,0.0,65,10,6,0,2,3,1,0
15,A Nortje,16,61.0,22,0.0,0.0,55,22,10,0,4,2,0,0
32,YS Chahal,15,57.1,21,0.0,0.0,23,16,3,2,2,2,0,0


Sorted by : 3-Fers


Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
7,JJ Bumrah,15,60.0,27,0.0,0.0,37,13,11,1,4,5,2,0
17,K Rabada,17,65.4,30,0.0,0.0,51,21,15,2,3,4,2,0
26,Rashid Khan,16,64.0,20,0.0,0.0,17,10,3,0,4,3,0,0
5,TA Boult,15,57.2,25,0.0,0.0,65,10,6,0,2,3,1,0
11,Mohammed Shami,14,53.4,20,0.0,0.0,51,15,10,1,3,3,0,0


Sorted by : Fours


Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
38,R Tewatia,14,46.0,10,0.0,0.0,12,16,8,2,8,2,0,0
26,Rashid Khan,16,64.0,20,0.0,0.0,17,10,3,0,4,3,0,0
76,CH Morris,9,31.4,11,0.0,0.0,17,6,14,0,4,2,1,0
46,M Ashwin,9,31.3,10,0.0,0.0,20,9,2,0,2,1,0,0
32,YS Chahal,15,57.1,21,0.0,0.0,23,16,3,2,2,2,0,0


Sorted by : Sixes


Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
89,JO Holder,7,28.0,14,0.0,0.0,24,5,8,0,0,2,0,0
76,CH Morris,9,31.4,11,0.0,0.0,17,6,14,0,4,2,1,0
0,DL Chahar,14,52.0,12,0.0,0.0,45,6,9,0,7,0,0,0
61,SN Thakur,9,32.2,10,0.0,0.0,26,8,7,2,2,0,0,0
1,SM Curran,13,42.0,13,0.0,0.0,36,8,11,2,5,2,0,0


Sorted by : WDs


Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
52,CV Varun,13,52.0,17,0.0,0.0,27,9,1,0,3,1,1,1
46,M Ashwin,9,31.3,10,0.0,0.0,20,9,2,0,2,1,0,0
32,YS Chahal,15,57.1,21,0.0,0.0,23,16,3,2,2,2,0,0
26,Rashid Khan,16,64.0,20,0.0,0.0,17,10,3,0,4,3,0,0
22,Sandeep Sharma,13,52.0,14,0.0,0.0,29,13,5,0,6,1,0,0


Sorted by : NBs


Unnamed: 0,Player,Innings,Overs,Wickets,SR,Ave,Fours,Sixes,WDs,NBs,0-Fers,3-Fers,4-Fers,5-Fers
0,DL Chahar,14,52.0,12,0.0,0.0,45,6,9,0,7,0,0,0
52,CV Varun,13,52.0,17,0.0,0.0,27,9,1,0,3,1,1,1
46,M Ashwin,9,31.3,10,0.0,0.0,20,9,2,0,2,1,0,0
41,PJ Cummins,14,52.0,12,0.0,0.0,43,15,9,0,8,2,1,0
36,S Gopal,14,50.0,10,0.0,0.0,30,19,5,0,6,0,0,0


In [7]:
f='1216493.yaml' # tied game

1st innings
dict_keys(['1st innings'])
2nd innings
dict_keys(['2nd innings'])


In [8]:
fil=os.path.join(data_dir, f )
data = yaml.load(open(fil, 'r'))

In [9]:
data['info']

{'competition': 'IPL',
 'dates': ['2020-09-20'],
 'gender': 'male',
 'match_type': 'T20',
 'outcome': {'result': 'tie', 'eliminator': 'Delhi Capitals'},
 'overs': 20,
 'player_of_match': ['MP Stoinis'],
 'teams': ['Delhi Capitals', 'Kings XI Punjab'],
 'toss': {'decision': 'field', 'winner': 'Kings XI Punjab'},
 'umpires': ['AK Chaudhary', 'Nitin Menon'],
 'venue': 'Dubai International Cricket Stadium'}

In [10]:
data['innings'][0].keys()

dict_keys(['1st innings'])

In [11]:
[list(d.keys())[0] for d in data['innings'] if 'Super' not in list(d.keys())[0] ]

['1st innings', '2nd innings']