In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly as py
%matplotlib inline

In [None]:
match=pd.read_csv('/kaggle/input/skillsay-ai-crickethon-dataset/matches.csv')

In [None]:
match

In [None]:
delivery=pd.read_csv('/kaggle/input/skillsay-ai-crickethon-dataset/deliveries.csv')
newdelivery=match.merge(delivery,left_on='id', right_on='match_id')
newdelivery

In [None]:
newdelivery.shape

In [None]:
newdelivery.columns

In [None]:
delivery = delivery.rename(columns = {"match_id" : "id"})

In [None]:
delivery[['id','batsman','over','ball','batsman_runs']]

In [None]:
match[['id','season']]

In [None]:
mergedbats=pd.merge(delivery[['id','batsman','over','ball','batsman_runs']],match[['id','season']],on='id')

In [None]:
mergedbats=mergedbats.groupby(['batsman','season']).sum()
mergedbats

In [None]:
mergedbowl=pd.merge(delivery[['id','bowler','over','ball','player_dismissed']],match[['id','season']],on='id')
mergedbowl = mergedbowl.groupby(['bowler', 'season']).count()
mergedbowl

Now, we will formulate the best batsmen and bowlers in the first 20 overs

# Top Batsmen in 20 overs

In [None]:
#runsum is the groupby of the batsman and batsman runs, in which the runs are summed
#ballplayed is the groupby of the batsman and batsman runs, in which the number of balls are counted
runsum = newdelivery.groupby('batsman')['batsman_runs'].sum().reset_index()
ballsplayed = newdelivery.groupby('batsman')['batsman_runs'].count().reset_index()

In [None]:
#merging the two dataframes above to a new dataframe called runsballs
runsballs = runsum.merge(ballsplayed, on ='batsman')
runsballs.rename(columns = {'batsman_runs_y':'balls_played', 'batsman_runs_x':'runs'}, inplace = True)

In [None]:
#calculating strike rates
runsballs['strike_rate']=(runsballs['runs']/runsballs['balls_played'])*100
runsballs

In [None]:
#Keeping only those players that have played over 200 balls
runsballs = runsballs[runsballs['balls_played']>=200]

In [None]:
#For Z Score, calculating Mean and Standard Deviation of the strike rate
meanrunsballs=runsballs['strike_rate'].mean()
stdrunsballs=runsballs['strike_rate'].std()
runsballs['Z Score']=(runsballs['strike_rate']-meanrunsballs)/stdrunsballs

In [None]:
runsballs.sort_values('strike_rate', ascending=False).head()

# Top Bowlers in 20 Overs

In [None]:
#Creating a copy of newdeliveries and storing it in a dataframe called bowlers
bowlers = newdelivery.copy()

In [None]:
#Separating the bowlers that have out the batsmen by choosing dismissal kinds
outtype = ['caught','bowled','lbw','stumped','caught and bowled','hit wicket']
outtotal = bowlers[bowlers['dismissal_kind'].isin(outtype)] 

In [None]:
#Grouping the bowlers with the player dismissed and counting the numbers
outtotal = outtotal.groupby('bowler')['player_dismissed'].count().sort_values(ascending = False).reset_index()
outtotal.rename(columns = {'player_dismissed':'wasout'}, inplace = True)

In [None]:
#Calculating the runs provided by the bowlers
runsgiven=bowlers.groupby('bowler')['total_runs'].sum().reset_index()
runsgiven.rename(columns={'total_runs':'runs'}, inplace = True)

In [None]:
runsgiven

In [None]:
ballsthrown=bowlers.groupby('bowler')['total_runs'].count().reset_index()
ballsthrown.rename(columns={'total_runs':'ballsthrown'}, inplace = True)

In [None]:
#Now we will calculate the economy rate of the bowler
bowlerperf= outtotal.merge(runsgiven, on ='bowler')

In [None]:
bowlerperf=bowlerperf.merge(ballsthrown, on = 'bowler')
bowlerperf.head()

In [None]:
bowlerperf['economyrate']=100-(bowlerperf['runs']/(bowlerperf['ballsthrown']/6))
bowlerperf=bowlerperf[bowlerperf['ballsthrown']>=200]                               

In [None]:
bowlerperf.head()

In [None]:
avgbowlerperf=bowlerperf['economyrate'].mean()

In [None]:
stdbowlerperf=bowlerperf['economyrate'].std()
bowlerperf['Z Score']=(bowlerperf['economyrate']-avgbowlerperf)/stdbowlerperf

In [None]:
bowlerperf.sort_values('economyrate', ascending = False)

In [None]:
bowlerperf

# Best 50 Batsmen in Last 4 Overs

In [None]:
plusfif=delivery['over']>15
lastfour=delivery[plusfif]
lastfour.head()

In [None]:
lfruns=lastfour.groupby('batsman')['batsman_runs'].sum().reset_index()
lfruns.rename(columns={'batsman_runs':'runs'},inplace=True)
lfballs=lastfour.groupby('batsman')['batsman_runs'].count().reset_index()
lfballs.rename(columns={'batsman_runs':'balls'}, inplace = True)
lfbatsmanperf=lfruns.merge(lfballs, on = 'batsman')
lfbatsmanperf=lfbatsmanperf[lfbatsmanperf['balls']>=200]
lfbatsmanperf['strike_rate']=(lfbatsmanperf['runs']/lfbatsmanperf['balls'])*100
avglfbatsmanperf=lfbatsmanperf['strike_rate'].mean()
stdlfbatsmanperf=lfbatsmanperf['strike_rate'].std()
lfbatsmanperf['Prob_Dist']= (1/(stdlfbatsmanperf * np.sqrt(2 * np.pi)) *
                            np.exp( - (lfbatsmanperf['strike_rate'] - avglfbatsmanperf)**2 /(2 * stdlfbatsmanperf**2)))
lfbatsmanperf['Z Score']=(lfbatsmanperf['strike_rate']-avglfbatsmanperf)/stdlfbatsmanperf
lfbatsmanperf=lfbatsmanperf.sort_values('Z Score', ascending = False).head(50)

In [None]:
lfbatsmanperf

# Best 50 Bowlers in Last 4 Overs

In [None]:
lfrunsgiven=lastfour.groupby('bowler')['total_runs'].sum().reset_index()
lfrunsgiven.rename(columns={'total_runs':'runs'}, inplace=True)
lfballs=lastfour.groupby('bowler')['total_runs'].count().reset_index()
lfballs.rename(columns={'total_runs':'balls'}, inplace = True)
lfbowlerperf=lfrunsgiven.merge(lfballs, on ='bowler')
lfbowlerperf=lfbowlerperf[lfbowlerperf['balls']>=200]
lfbowlerperf['economyrate']=100-(lfbowlerperf['runs']/(lfbowlerperf['balls']/6))
avglfbowlerperf=lfbowlerperf['economyrate'].mean()
stdlfbowlerperf=lfbowlerperf['economyrate'].std()
lfbowlerperf['Prob_Dist']= (1/(stdlfbowlerperf * np.sqrt(2 * np.pi)) *
                            np.exp( - (lfbowlerperf['economyrate'] - avglfbowlerperf)**2 /(2 * stdlfbowlerperf**2)))
lfbowlerperf['Z Score']=(lfbowlerperf['economyrate']-avglfbowlerperf)/stdlfbowlerperf
lfbowlerperf=lfbowlerperf.sort_values("Z Score", ascending = False).head(50)
lfbowlerperf


# Comparision Function

We have been successfully been able to calculate the best batsmen and bowlers in all 20 overs and also in the last 4 overs that have played over 200 balss each by using strike rates and economy rates respectively. Also, we have been able to calculate their Z Score.

Now, we will aim on comparing the Z Scores of the batsmen and bowlers to check which is a better T20 player.

# Idea
* Comapring the Z Score of batsman and bowler in all 20 overs and the one who has a greater value is a better. We will increment a variable accordingly to distinguish.
* We will apply the same rule as above to the players in last 4 overs and ddo the same thing.
* If one of the players isn't present in the last 4 overs table then we wull increment the variable of the player in the table with 2 to give him an extra advantage.


In [None]:
def check(batsman_name, bowler_name): #function to check if the players are present in the list or not
    batsmanyes=False
    bowleryes=False
    if batsman_name in list(runsballs['batsman'].unique()) and bowler_name in list(bowlerperf['bowler'].unique()):
        return True

In [None]:
def senddata(batsman_name, bowler_name): #function to send the respective datafrane of the player to the function
    allbats=runsballs[runsballs['batsman']==batsman_name]
    lfbats=lfbatsmanperf[lfbatsmanperf['batsman']==batsman_name]
    allbowl=bowlerperf[bowlerperf['bowler']==bowler_name]
    lfbowl=lfbowlerperf[lfbowlerperf['bowler']==bowler_name]
    return(allbats, lfbats, allbowl, lfbowl)

In [None]:
def compare(batsman_name, bowler_name):
    if check(batsman_name, bowler_name):
        allbats, lfbats, allbowl, lfbowl = senddata(batsman_name, bowler_name)
        #We will check if the given player is in top 10 or not using len function
        lenbats=len(lfbats)
        lenbowl=len(lfbowl)
        
        #The variables for batsman and bowler
        
        varbat=0
        varbowl=0
        
        #Checking in all overs
        if allbats['Z Score'].values[0] > allbowl['Z Score'].values[0]:
            varbat = varbat + 1
        elif allbats['Z Score'].values[0] < allbowl['Z Score'].values[0]:
            varbowl = varbowl + 1
        elif allbats['Z Score'].values[0] == allbowl['Z Score'].values[0]:
            varbat = varbat + 1
            varbowl = varbowl + 1
            
        #Checkiing for last 4 overs
        if lenbats > 0 and lenbowl > 0:
            if allbats['Z Score'].values[0] > allbowl['Z Score'].values[0]:
                varbat = varbat + 1
            elif allbats['Z Score'].values[0] < allbowl['Z Score'].values[0]:
                varbowl = varbowl + 1
            elif allbats['Z Score'].values[0] == allbowl['Z Score'].values[0]:
                varbat = varbat + 1
                varbowl = varbowl + 1
                
        #When one player isn't in the list of top 10
        if lenbats == 0 and lenbowl > 0:
            varbowl = varbowl + 2
        if lenbats > 0 and lenbowl == 0:
            varbat = varbat + 2
            
        #Actual and Final Comparision
        if varbat > varbowl:
            print(batsman_name, 'is a better player than', bowler_name, 'in T20')
        elif varbat < varbowl:
            print(bowler_name, 'is a better player than', batsman_name, 'in T20')
        elif varbat == varbowl:
            print(batsman_name, 'and', bowler_name, 'are players of same grade.')
    else:
        print('Incorrect player name or one or both players have not played over 200 balls and thys cannot be compared.')
        

In [None]:
compare('MS Dhoni', 'SL Malinga')

In [None]:
compare('V Kohli', 'SP Narine')

# Plotting the best batsmen and bowlers on Bell Curve

## For Batsmen

In [None]:
import plotly.offline as pyo
import plotly.graph_objs as go
trace = go.Scatter(x=lfbatsmanperf['strike_rate'], y = lfbatsmanperf['Prob_Dist'], mode = 'markers',
                  text = lfbatsmanperf['batsman'], marker = {'color':'#00a65a', 'size':16})
data = [trace]
layout = go.Layout(title = 'Strike Rate vs Prob Distribution', xaxis = {'title':'Batsman Strike Rate'},
                              yaxis = {'title':'Probability of similar batsmen'})
fig = go.Figure(data = data, layout = layout)
batsmancomp = pyo.plot(fig, filename='myfile.html')
print(batsmancomp)

## For Bowlers

In [None]:
trace = go.Scatter(x=lfbowlerperf['economyrate'], y = lfbowlerperf['Prob_Dist'], mode = 'markers',
                  text = lfbowlerperf['bowler'], marker = {'color':'#00a65a', 'size':16})
data = [trace]
layout = go.Layout(title = 'Economy Rate vs Prob Distribution', xaxis = {'title':'Bowler Economy Rate'},
                              yaxis = {'title':'Probability of similar bowlers'})
fig = go.Figure(data = data, layout = layout)
bowlercomp = pyo.plot(fig, filename='myfile.html')
bowlercomp

# Comparing the palyers and Displaying  their statistics
## The function is supposed to print a repprt card for a player by displaying their overall performance in their career and also showing year wise results

In [None]:
def batsmansearch(batsman_name):
    for i in range(0,len(mergedbats)):
        if(np.array(mergedbats.iloc[i].name)[0]== batsman_name):
            print(mergedbats.iloc[i])

In [None]:
def bowlersearch(bowler_name):
    for i in range(0, len(mergedbowl)):
        if(np.array(mergedbowl.iloc[i].name)[0] == bowler_name):
            print(mergedbowl.iloc[i])

In [None]:
def batsmanstats(batsman_name):
    print("HERE IS A REPORT OF OVERALL PERFORMANCE OF: ", batsman_name)
    print("******************************************")
    srate1 = runsballs[runsballs['batsman']==batsman_name]['strike_rate'].item()
    srate2 = lfbatsmanperf[lfbatsmanperf['batsman']==batsman_name]['strike_rate'].item()
    print("The Overall Strike Rate is: ",srate1)
    print("The Overall Strike Rate in dangerous overs is: ", srate2)
    runs2= lfbatsmanperf[lfbatsmanperf['batsman']==batsman_name]['runs'].item()
    runs1 = runsballs[runsballs['batsman']==batsman_name]['runs'].item()
    print("Total runs scored: ", runs1)
    print("Total runs scored in dangerous overs: ", runs2)
    balls1 = lfbatsmanperf[lfbatsmanperf['batsman']==batsman_name]['balls'].item()
    balls2 = runsballs[runsballs['batsman']==batsman_name]['balls_played'].item()
    print("Total balls played: ", balls2)
    print("Total balls played in dangerous overs: ", balls1)
    print("*******************************************")
    print("Results year wise: ")
    batsmansearch(batsman_name)

In [None]:
batsmanstats('V Kohli')
#lfbatsmanperf[lfbatsmanperf['batsman']=='V Kohli']['strike_rate'].item()

In [None]:
def bowlerstats(bowler_name):
    print("HERE IS A REPORT OF OVERALL PERFORMANCE OF: ", bowler_name)
    print("******************************************")
    erate1 = bowlerperf[bowlerperf['bowler']==bowler_name]['economyrate'].item()
    erate2 = lfbowlerperf[lfbowlerperf['bowler']==bowler_name]['economyrate'].item()
    print("The Overall Economy Rate is: ",erate1)
    print("The Overall Economy Rate in dangerous overs is: ", erate2)
    runs2= lfbowlerperf[lfbowlerperf['bowler']==bowler_name]['runs'].item()
    runs1 = bowlerperf[bowlerperf['bowler']==bowler_name]['runs'].item()
    print("Total runs given: ", runs1)
    print("Total runs given in dangerous overs: ", runs2)
    balls1 = lfbowlerperf[lfbowlerperf['bowler']==bowler_name]['balls'].item()
    balls2 = bowlerperf[bowlerperf['bowler']==bowler_name]['ballsthrown'].item()
    print("Total overs bowled: ", balls2/6)
    print("Total overs bowled in dangerous overs: ", balls1/6)
    print("*******************************************")
    print("Results year wise: ")
    bowlersearch(bowler_name)

In [None]:
bowlerstats('SL Malinga')