In [76]:
# For data handling
import numpy as np
import pandas as pd

# For visvalization
import matplotlib.pyplot as plt
# import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# For creating widgets
from ipywidgets import widgets, interact

# For hiding warnings
import warnings
warnings.filterwarnings('ignore')

In [78]:
df=pd.read_csv('ODI Cricket Dataset clean.csv')
# df.head()

In [79]:
Country='India'
team2='New Zealand'
Starting_Year=2004 # Maches after Starting_Year will be considered for analysis

In [80]:
# Filtering the Country for analysis
dfnew=df[(df['batting_team']==Country)|(df['bowling_team']==Country)& (df['year']>=Starting_Year)]

In [97]:
## Most run scoring batsman and most wicket taking bowler

In [105]:
def most_runs(Country, Starting_Year):
    
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))& (df['year']>=Starting_Year)]
    dfnew_1=dfnew[(dfnew['batting_team']==Country)]

    df_batsman=dfnew_1.groupby(['striker']).agg({'runs_off_bat':sum, 'ball':'count'}).reset_index()

    # Finding the strike rate
    df_batsman['strike_rate']=round((df_batsman['runs_off_bat']*100/df_batsman['ball']),1)

    df_batsman_temp=df_batsman[(df_batsman['strike_rate']>0)&(df_batsman['strike_rate']<df_batsman['strike_rate'].quantile(0.98))]

    fig=px.treemap(df_batsman_temp, path=[px.Constant(f"{Country}'s Batsman"),'striker'],values='runs_off_bat',
               color='strike_rate', color_continuous_scale='RdBu')
    fig.update_layout(title=dict(text=f"Runs scored by {Country}'s batsman ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.5, font=dict(size=20)))
    fig.show()

In [106]:
# most_runs('India', Starting_Year=2010)

In [83]:
def most_wickets(Country, Starting_Year=2004):
   
    # Filtering the Country for analysis
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))&(df['year']>=Starting_Year)]

    dfnew_1=dfnew[(dfnew['bowling_team']==Country)]

    df_bowler=dfnew_1.groupby(['bowler']).agg({'runs_off_bat':sum, 'wicket_type':'count', 'ball':'count'}).reset_index()

    # Finding the strike rate
    df_bowler['economy']=round((df_bowler['runs_off_bat']*6/df_bowler['ball']),1)
    df_bowler_temp=df_bowler[(df_bowler['wicket_type']>0)&(df_bowler['economy']<df_bowler['economy'].quantile(0.95))]
    
    # For plotting
    fig=px.treemap(df_bowler_temp, path=[px.Constant(f"{Country}'s Bowler"),'bowler'],values='wicket_type',
               color='economy', color_continuous_scale='RdBu')
    fig.update_layout(title=dict(text=f"Wickets taken by {Country}'s bolwer ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.5))
    fig.show()

In [84]:
# most_wickets('Australia')

In [98]:
## Most 100's, 50's, sixes, fours scored

In [85]:
def score_sheet(Country, Starting_Year=2004):
    
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))&(df['year']>=Starting_Year)]
    dfnew_1=dfnew[(dfnew['batting_team']==Country)]

    dfnew_runs=dfnew_1.groupby(['match_id','striker']).agg({'runs_off_bat':sum}).reset_index()
    dfnew_runs['100_count']=dfnew_runs['runs_off_bat']//100
    dfnew_runs['50_count']=dfnew_runs['runs_off_bat']//50

    dfnew_100=dfnew_runs[(dfnew_runs['100_count']>0)].groupby(['striker']).agg({'100_count':sum}).reset_index().nlargest(5,'100_count')
    dfnew_50=dfnew_runs[(dfnew_runs['50_count']>0)].groupby(['striker']).agg({'50_count':sum}).reset_index().nlargest(5,'50_count')


    dfnew_sixes=dfnew_1[dfnew_1['runs_off_bat']==6].groupby(['striker']).agg({'runs_off_bat':'count'}).reset_index().nlargest(5,'runs_off_bat')
    dfnew_fours=dfnew_1[dfnew_1['runs_off_bat']==4].groupby(['striker']).agg({'runs_off_bat':'count'}).reset_index().nlargest(5,'runs_off_bat')

    fig=make_subplots(rows=2, cols=2, subplot_titles=("Most number of 100's","Most number of 50's",
                                                      "Most number of 6's","Most number of 4's"))

    fig.add_trace(go.Bar(x=dfnew_100['striker'], y=dfnew_100['100_count'], name="100's"), row=1, col=1)
    fig.add_trace(go.Bar(x=dfnew_50['striker'], y=dfnew_50['50_count'], name="50's"), row=1, col=2)
    fig.add_trace(go.Bar(x=dfnew_sixes['striker'], y=dfnew_sixes['runs_off_bat'], name="6's"), row=2, col=1)
    fig.add_trace(go.Bar(x=dfnew_fours['striker'], y=dfnew_fours['runs_off_bat'], name="4's"), row=2, col=2)

    fig.update_layout(title=dict(text=f"Score of {Country} ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.5), showlegend=False)
    fig.show()

In [86]:
# score_sheet('Australia')

In [99]:
## Number of Matches won and lost

In [87]:
def win_loss(Country, Starting_Year=2004):
    
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))&(df['year']>=Starting_Year)]
    df_wins=dfnew.groupby(['match_id', 'batting_team','bowling_team']).agg({'total':sum}).reset_index()

    dfCountry=df_wins[df_wins['batting_team']==Country]
    dfCountry_no=df_wins[df_wins['batting_team']!=Country]

    dfteamnew=dfCountry.merge(dfCountry_no, on='match_id')
    dfteamnew['win']=dfteamnew['total_x']>dfteamnew['total_y']

    fig=px.histogram(dfteamnew, y='bowling_team_x', color='win',barnorm='percent',
                category_orders={'win':[True, False]})
    fig.update_layout(title=dict(text=f"Winning percentage of {Country} ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.5),
                     xaxis=dict(title='winning percentage'), yaxis=dict(title='opponent countries'))
    fig.show()

In [88]:
# win_loss('South Africa')

In [100]:
## Fall of wickets

In [89]:
def wicket_fall(Country, Starting_Year=2004):
    
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))&(df['year']>=Starting_Year)]
    dfnew['count']=1
    dfnew.loc[dfnew['wicket_type'].isnull(), 'count']=0
    dfwicket=dfnew[dfnew['batting_team']==Country]
    dfwicket=dfwicket.groupby(['over','wicket_type'])['count'].sum().reset_index()

    fig=px.bar(dfwicket, x='over', y='count', color='wicket_type',range_x=[0,51])
    fig.update_layout(title=dict(text=f"{Country}'s fall of wickets and types ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.45),
                     xaxis=dict(title='Overs'), yaxis=dict(title='Wickets count'))

    fig.show()

In [90]:
# wicket_fall('Australia')

In [101]:
### Run scored

In [91]:
def run_score(Country, Starting_Year=2004):
    
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))&(df['year']>=Starting_Year)]
    dfruns=dfnew[(dfnew['batting_team']==Country)&(dfnew['runs_off_bat']>0)]
    dfruns=dfruns.groupby(['over','runs_off_bat'])['ball'].count().reset_index()
    dfruns['runs_off_bat']=dfruns['runs_off_bat'].astype(int).astype(object)

    fig=px.bar(dfruns, x='over', y='ball', color='runs_off_bat', range_x=[0,51])
    fig.update_layout(title=dict(text=f"{Country}'s run scores ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.45),
                     xaxis=dict(title='Overs'), yaxis=dict(title='Runs scored'))
    
    fig.show()

In [92]:
# run_score('New Zealand')

In [93]:
def run_pie_plot(Country, Starting_Year=2004):
    
    dfnew=df[((df['batting_team']==Country)|(df['bowling_team']==Country))&(df['year']>=Starting_Year)]
    dfruns=dfnew[(dfnew['batting_team']==Country)&(dfnew['runs_off_bat']>0)]
    dfruns_pie=dfruns.groupby(['runs_off_bat'])['ball'].sum().reset_index()
    fig=px.pie(dfruns_pie, names='runs_off_bat', values='ball', hole=0.6)
    fig.update_layout(title=dict(text=f"{Country}'s run scores ({Starting_Year} onwards)",
                                 xanchor='center', yanchor='top', x=0.5),
                     annotations=[dict(text='Runs scored', x=0.5, y=0.5, font_size=20, showarrow=False)])
    fig.show()

In [94]:
# run_pie_plot('New Zealand')

In [95]:
team_list=list(df['batting_team'].unique())
years=list(np.sort(df['year'].unique()))

def master_function(Country, Starting_Year):
    most_runs(Country, Starting_Year)
    most_wickets(Country, Starting_Year)
    score_sheet(Country, Starting_Year=2004)
    win_loss(Country, Starting_Year=2004)
    wicket_fall(Country, Starting_Year=2004)
    run_score(Country, Starting_Year=2004)
    run_pie_plot(Country, Starting_Year=2004)

interact(master_function, Country=team_list, Starting_Year=years);

interactive(children=(Dropdown(description='Country', options=('India', 'England', 'South Africa', 'Pakistan',â€¦

In [102]:
## Opponent Analysis

In [96]:
def opponent_team(Country, team2, Starting_Year=2004, venue_name='McLean Park'):

    dfnew=df[(df['batting_team']==Country)|(df['bowling_team']==Country)& (df['year']>=Starting_Year)]

    condition1=(dfnew['batting_team']==Country)&(dfnew['bowling_team']==team2)
    condition2=(dfnew['batting_team']==team2)&(dfnew['bowling_team']==Country)

    dfopp=dfnew[condition1|condition2]
    
#     print('The venues are : ', dfopp['venue'].unique())
    
#     venue_name=input('Enter the venue location')

    dfopp1=dfopp[dfopp['venue']==venue_name]
    dfopp1=dfopp1.groupby(['match_id','batting_team','over']).agg({'count':sum, 'runs_off_bat':'sum'}).reset_index()
    dfopp1=dfopp1.groupby(['batting_team','over']).agg({'count':'mean', 'runs_off_bat':'mean'}).reset_index()

    dfopp1[['count','runs_off_bat']]= dfopp1[['count','runs_off_bat']].apply(np.ceil).astype(int)

    overs=dfopp1['over'].unique()

    Country_runs_cumu=dfopp1[dfopp1['batting_team']==Country]['runs_off_bat'].cumsum()
    team2_runs_cumu=dfopp1[dfopp1['batting_team']==team2]['runs_off_bat'].cumsum()

    Country_runs=dfopp1[dfopp1['batting_team']==Country]['runs_off_bat']
    team2_runs=dfopp1[dfopp1['batting_team']==team2]['runs_off_bat']

    Country_size=dfopp1[dfopp1['batting_team']==Country]['count']*10
    team2_size=dfopp1[dfopp1['batting_team']==team2]['count']*10

    fig=go.Figure()
    fig.add_trace(go.Scatter(x=overs, y=Country_runs_cumu, name=Country, marker_color='blue'))
    fig.add_trace(go.Scatter(x=overs, y=team2_runs_cumu, name=team2, marker_color='red'))
    fig.add_trace(go.Bar(x=overs, y=Country_runs, showlegend=False, marker_color='blue'))
    fig.add_trace(go.Bar(x=overs, y=team2_runs, showlegend=False, marker_color='red'))

    fig.add_trace(go.Scatter(x=overs, y=Country_runs_cumu, mode='markers', marker=dict(size=Country_size), 
                            showlegend=False, marker_color='blue'))
    fig.add_trace(go.Scatter(x=overs, y=team2_runs_cumu, mode='markers', marker=dict(size=team2_size),
                             showlegend=False, marker_color='red'))

    fig.update_layout(title=dict(text=f"{Country} Vs {team2} run scores at {venue_name}",
                                 xanchor='center', yanchor='top', x=0.45),
                     xaxis=dict(title='Overs'), yaxis=dict(title='Runs scored'))