In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot

import warnings
warnings.filterwarnings("ignore")

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
df=pd.read_csv("/kaggle/input/ipl-complete-dataset-20082020/IPL Matches 2008-2020.csv")
df_balltoball=pd.read_csv("/kaggle/input/ipl-complete-dataset-20082020/IPL Ball-by-Ball 2008-2020.csv")

In [None]:
df.head()

In [None]:

x=['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
    'Rising Pune Supergiant', 'Royal Challengers Bangalore',
    'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
    'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
    'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants', 'Delhi Capitals']

y = ['SRH','MI','GL','RPS','RCB','KKR','DC','KXIP','CSK','RR','SRH','KTK','PW','RPS','DC']

df.replace(x,y,inplace = True)
df_balltoball.replace(x,y,inplace = True)
df['city'].replace('Bengaluru','Bangalore',inplace = True)

In [None]:
def Annotation(plots):
    for bar in plots.patches:

        plots.annotate(format(bar.get_height(), '.2f'), 
                       (bar.get_x() + bar.get_width() / 2, 
                        bar.get_height()), ha='center', va='center',
                       size=8, xytext=(0, 5),
                       textcoords='offset points')

In [None]:

fig, ax = plt.subplots(1, 3, figsize=(20,8), sharey=True)
sns.set_theme(style="whitegrid")

sns.countplot(ax=ax[0],x="winner", data=df,order = df["winner"].value_counts().index)
Annotation(sns.countplot(ax=ax[0],x="winner", data=df,order = df["winner"].value_counts().index))
ax[0].set_title("Match Winner")


sns.countplot(ax=ax[1],x="toss_winner", data=df,order = df["toss_winner"].value_counts().index)
Annotation(sns.countplot(ax=ax[1],x="toss_winner", data=df,order = df["toss_winner"].value_counts().index))
ax[1].set_title("Toss Winner")


sns.countplot(ax=ax[2],x="winner", data=df[df['toss_winner']==df['winner']],order = df["winner"].value_counts().index)
Annotation(sns.countplot(ax=ax[2],x="winner", data=df[df['toss_winner']==df['winner']],order = df["winner"].value_counts().index))
ax[2].set_title("Toss winner and Match Winner")


**Observation 1**

1. RPS, GL, PW and KTK has not played all seasons that's why they have very low count
2. MI has won most of the toss and match. 
3. DC has a cood luck for toss but not same for matches.

In [None]:
'''
toss_decision={'field':0,'bat':1}
result={'runs':0,'wickets':1}
df['toss_decision'].map(toss_decision)
df['result'].map(result)'''

In [None]:

df1 = df.groupby('city')['toss_decision'].value_counts(normalize=True)
df1 = df1.mul(100)
df1 = df1.rename('percent').reset_index()

g = sns.catplot(x='city',y='percent',hue='toss_decision',kind='bar',data=df1)
g.fig.set_figwidth(30)
g.fig.set_figheight(12)
g.set_xticklabels(rotation=90)
g.ax.set_ylim(0,100)

for p in g.ax.patches:
    txt = str(p.get_height().round()) + '%'
    txt_x = p.get_x() 
    txt_y = p.get_height()
    g.ax.text(txt_x,txt_y,txt,size=10)

In [None]:

df1 = df.groupby('toss_winner')['toss_decision'].value_counts(normalize=True)
df1 = df1.mul(100)
df1 = df1.rename('percent').reset_index()

g = sns.catplot(x='toss_winner',y='percent',hue='toss_decision',kind='bar',data=df1)
g.fig.set_figwidth(20)
g.fig.set_figheight(8)
g.ax.set_ylim(0,100)


for p in g.ax.patches:
    txt = str(p.get_height().round()) + '%'
    txt_x = p.get_x() 
    txt_y = p.get_height()
    g.ax.text(txt_x,txt_y,txt)

**Observation 2**

1. Most of the team choose to field after winning toss. CSK  is an exception.
2. In Bangalore most of the teams choose fielding that's why RCB has such high percentage of choosing fielding.

In [None]:
teams=pd.concat([df['team1'],df['team2']])
teams=teams.value_counts().reset_index()
teams.columns=['Team','Total Matches']
teams['Wins']=df['winner'].value_counts().reset_index()['winner']
teams['Win percentage']=((teams['Wins']/teams['Total Matches'])*100).round()
trace = go.Table(
    header=dict(values=["Team","Total Matches","Wins",'Win percentage'],
                fill = dict(color='#5499C7'),
                font = dict(color=['rgb(45, 45, 45)'] * 5, size=14),
                align = ['center'],
               height = 30),
    cells=dict(values=[teams['Team'], teams['Total Matches'], teams['Wins'],teams['Win percentage']],
               fill = dict(color=['rgb(174, 214, 241)', 'rgba(228, 222, 249, 0.65)']),
               align = ['center'], font_size=13, height=25))

layout = dict(
    width=750,
    height=520,
    autosize=False,
    title='Total Matches vs Wins per team',
    margin = dict(t=100),
    showlegend=False,    
)
fig1 = dict(data=[trace], layout=layout)
iplot(fig1)

In [None]:
df_balltoball.head()

In [None]:
df['season'] = df['date'].str[:4].astype(int)
batsmen = df[['id','season']].merge(df_balltoball, left_on = 'id', right_on = 'id', how = 'left').drop('id', axis = 1)
season=batsmen.groupby(['season'])['total_runs'].sum().reset_index()

avgruns_each_season=df.groupby(['season']).count().id.reset_index()
avgruns_each_season.rename(columns={'id':'matches'},inplace=1)
avgruns_each_season['total_runs']=season['total_runs']
avgruns_each_season['average_runs_per_match']=(avgruns_each_season['total_runs']/avgruns_each_season['matches']).round()
trace = go.Table(
    header=dict(values=["Season","Matches","Total runs",'Average runs/match'],
                fill = dict(color='#2CF79F'),
                font = dict(color=['rgb(45, 45, 45)'] * 5, size=14),
                align = ['center'],
               height = 30),
    cells=dict(values=[avgruns_each_season['season'], avgruns_each_season['matches'], avgruns_each_season['total_runs'],avgruns_each_season['average_runs_per_match']],
               fill = dict(color=['rgb(165, 255, 216)', 'rgba(228, 222, 249, 0.65)']),
               align = ['center'], font_size=13, height=25))

layout = dict(
    width=750,
    height=550,
    autosize=False,
    title='Total Run vs Average runs per match',
    margin = dict(t=100),
    showlegend=False,    
)
fig1 = dict(data=[trace], layout=layout)
iplot(fig1)

In [None]:
high_scores=df_balltoball.groupby(['id', 'inning','batting_team','bowling_team'])['total_runs'].sum().reset_index() 
high_scores=high_scores[high_scores['total_runs']>=200]
hss = high_scores.nlargest(10,'total_runs')

trace = go.Table(
    header=dict(values=["Inning","Batting Team","Bowling Team", "Total Runs"],
                fill = dict(color = 'black'),
                font = dict(color = 'white', size = 14),
                align = ['center'],
               height = 30),
    cells=dict(values=[hss['inning'], hss['batting_team'], hss['bowling_team'], hss['total_runs']],
               fill = dict(color = ['#E2E4E3', 'rgb(254, 254, 254 )']),
               align = ['center'], font_size=13))

layout = dict(
    width=830,
    height=410,
    autosize=False,
    title='Highest scores of IPL',
    showlegend=False,    
)

fig1 = dict(data=[trace], layout=layout)
iplot(fig1)

In [None]:
batsman_run=df_balltoball.groupby(['batsman'])['batsman_runs'].sum().nlargest().reset_index() 
batsman_run.columns=['batsman','batsman_runs']

bowler_wicket=df_balltoball.groupby(['bowler'])['is_wicket'].sum().nlargest().reset_index() 
bowler_wicket.columns=['bowler','is_wicket']

trace = go.Table(
    header=dict(values=["Batsman","Total Runs","Bowler","Wicket"],
                fill = dict(color = '#FFABD6'),
                font = dict(color = 'white', size = 14),
                align = ['center'],
               height = 30),
    cells=dict(values=[batsman_run['batsman'], batsman_run['batsman_runs'],bowler_wicket['bowler'], bowler_wicket['is_wicket']],
               fill = dict(color = [ 'rgb(254, 254, 254 )']),
               align = ['center'], font_size=13))

layout = dict(
    width=830,
    height=320,
    autosize=False,
    title='Highest Run scorer and Most wicket Taker',
    showlegend=False,    
)

fig1 = dict(data=[trace], layout=layout)
iplot(fig1)

In [None]:
wicket_per_over=df_balltoball.groupby(['over'])['is_wicket'].sum().reset_index()
wicket_per_over.columns=['Over','Wicket']
plt.figure(figsize=(15,8))
sns.lineplot(data=wicket_per_over, x="Over", y="Wicket",markers=True, dashes=False).set(xticks=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19])

**Observation 3**

1. RCB has the top highest score is IPL, Chris Gayle score 175 in 66 ball in that match.
2. Virat and Mallinga is highest scorer and wicket taker.
3. No of wicket fall reduce after 5th over because power play is over.