In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
print('Priyatama is ready!')

## 1. Reading the dataset.

In [None]:
match = pd.read_csv('../input/ipl-complete-dataset-20082020/IPL Matches 2008-2020.csv')
ball = pd.read_csv('../input/ipl-complete-dataset-20082020/IPL Ball-by-Ball 2008-2020.csv')

In [None]:
match.head(3)

In [None]:
ball.head(3)

## 2. Cleaning & Setting up the dataset.

In [None]:
sns.heatmap(match.isnull())

In [None]:
match.isnull().sum()

In [None]:
sns.heatmap(ball.isnull())

In [None]:
ball.isnull().sum()

In [None]:
match.columns

In [None]:
print ('Matches played so far: ', match.shape[0]) #Here [0] is used to show only count of Columns.
print ('\n Cities played at so far: ','\n', match["city"].unique()) #\n is used to make a new line.
print ('\n Teams Participated :', '\n', match['team1'].unique())

In [None]:
match = match.replace({'Rising Pune Supergiants': 'Rising Pune Supergiant'}, regex=True)
ball = ball.replace({'Rising Pune Supergiants': 'Rising Pune Supergiant'}, regex=True)
match['team1'].unique()

In [None]:
match['Season'] = pd.DatetimeIndex(match['date']).year #Set year from one column of date in to a new column.
match.head(3)

## 3. Exploratory data analysis.

In [None]:
#Total matches played per season.
match_per_season = match.groupby(['Season'])['id'].count().reset_index().rename(columns={'id':'Matches'}) #Reset_Index helps make it into a dataframe.
match_per_season

In [None]:
#Total matches played per season in a plot.
sns.countplot(match['Season']) #Plot countplot as it is.
plt.xticks(rotation = 90, fontsize = 10) #Stylize on X axis.
plt.yticks(fontsize = 10) #Stylize X axis.
plt.xlabel('Seasons', fontsize  = 12 ) #Stylize X header.
plt.ylabel('Matches', fontsize = 12) #Stylize X header.
plt.title('Matches Played Per Season', fontsize = 15, fontweight = 'bold') #Add and stylize title.

In [None]:
#Make a new data set season by putting Season from Match data in Ball data, by ID.
seasons = match[['id', 'Season']].merge(ball, left_on= 'id',right_on = 'id', how = 'left')

In [None]:
seasons.head(3)

In [None]:
seasons = seasons.drop('id', axis=1) #Drop column with 'id' 
#Could be done in one step - seasons = match[['id', 'Season']].merge(ball, left_on= 'id',right_on = 'id', how = 'left')
seasons.head(3)

In [None]:
#Plot a lineplot to show runs scored across seasons.
runs_season=seasons.groupby(['Season'])['total_runs'].sum().reset_index() #Make a new list of grouping total runs per season. 
rs=runs_season.set_index('Season') #Set_index to convert it to a dataframe.
ax = plt.axes()
ax.set(facecolor = "Grey") #BG Colour
sns.lineplot(data=rs,palette="magma") #Line colour 
plt.title('Total runs in each season',fontsize=12,fontweight="bold") #Stylize Title
plt.show()

In [None]:
#Plot a lineplot to show wickets across seasons.
wickets_season=seasons.groupby(['Season'])['is_wicket'].sum().reset_index() #Make a new list of grouping total runs per season. 
ws=wickets_season.set_index('Season') #Set_index to convert it to a dataframe.
ax = plt.axes()
ax.set(facecolor = "Grey") #BG Colour
sns.lineplot(data=ws,palette="magma") #Line colour 
plt.title('Total wickets in each season',fontsize=12,fontweight="bold") #Stylize Title
plt.show()

In [None]:
#Plot a lineplot to show runs across teams.
team_runs=seasons.groupby(['batting_team'])['total_runs'].sum().reset_index() #Make a new list of grouping total runs per season. 
tr=team_runs.set_index('batting_team') #Set_index to convert it to a dataframe.
ax = plt.axes()
ax.set(facecolor = "Grey") #BG Colour
sns.lineplot(data=tr,palette="magma") #Line colour 
plt.xticks(rotation = 90)
plt.title('Total runs by each team',fontsize=12,fontweight="bold") #Stylize Title
plt.show()

In [None]:
#Plot a lineplot to show runs across teams.
wickets_team = seasons.groupby(['bowling_team'])['is_wicket'].sum().reset_index()
wt = wickets_team.set_index('bowling_team')
ax = plt.axes()
ax.set(facecolor = 'Grey')
sns.lineplot(data = wt, palette = 'magma')
plt.title('Total Wickets by Team', fontsize = 15, fontweight = 'bold')
plt.xticks(rotation=90)
plt.xlabel('Teams', fontsize = 13 )
plt.ylabel('Total Wickets', fontsize = 13)

In [None]:
#Plot a barplot to show toss won per team.
toss = match['toss_winner'].value_counts()
ax = plt.axes()
ax.set(facecolor = 'Grey')
sns.set(rc={'figure.figsize':(10,5)}, style= 'darkgrid')
sns.barplot(y=toss.index, x=toss, orient='h', palette = 'icefire', saturation = 1)
plt.title('Toss won by teams', fontsize = 15, fontweight = 'bold')
plt.xlabel('Toss Won')
plt.ylabel('Teams')
plt.show()

In [None]:
#Plot a barplot to show matches won per team.
win = match['winner'].value_counts()
ax = plt.axes()
ax.set(facecolor = 'Grey') 
sns.set(rc={'figure.figsize': (10,5)},style = 'darkgrid')
sns.barplot(y=win.index, x=win, orient='h', palette = 'icefire', saturation = 1)
plt.xticks(fontsize = 13)
plt.yticks(fontsize = 13)
plt.xlabel('Total Wins')
plt.ylabel('Teams')
plt.title('Total Team Wins', fontsize = 15, fontweight = 'bold')

In [None]:
#Toss decision across seasons.
ax = plt.axes()
ax.set(facecolor='Grey')
sns.set({'figure.figsize':(10,5)}, style = 'darkgrid')
sns.countplot(x='Season', hue = 'toss_decision', data= match)
plt.xlabel('Seasons', fontsize = 13)
plt.ylabel('Count', fontsize = 13)
plt.title('Toss Decision across seasons', fontsize = 15, fontweight = 'bold')
plt.xticks(rotation = 90)

In [None]:
#How many matches each team won after winning and loosing toss.
twmw = match['toss_winner'] == match['winner']
ax = plt.axes()
ax.set(facecolor='Grey')
sns.set({'figure.figsize':(10,5)}, style = 'darkgrid')
sns.countplot(match['winner'], hue=twmw, order = match['winner'].value_counts().index) #Order the countplot with value counts.
plt.xticks(rotation = 90)
plt.xlabel('Toss Wins')
plt.ylabel('Count of Match Wins')
plt.title('Match wins per toss win', fontsize = 15, fontweight = 'bold')
plt.legend(title='Match Result', loc='upper right', labels=['Lost', 'Won']) #Rename Legends.

In [None]:
#What was decision of teams after winning toss?
twmw = match['toss_winner'] == match['winner']
ax = plt.axes()
ax.set(facecolor='Grey')
sns.set({'figure.figsize':(10,5)}, style = 'darkgrid')
sns.countplot(match['toss_winner'], hue=match['toss_decision'], order = match['toss_winner'].value_counts().index)
plt.xticks(rotation = 90)
plt.legend(title = 'Toss Decision', loc = 'upper right',labels = ['Field First', 'Bat First'])
plt.xlabel('Toss Winner', fontsize = 10)
plt.ylabel('Count of Decision', fontsize = 10)
plt.title('Toss Decision per Team', fontsize = 15, fontweight = 'bold')

In [None]:
#Place a countplot most wicket taking bowlers.
#First fill all NA in dismissal kind with 'Not a wicket'.
ball['dismissal_kind'].fillna('Not a Wicket', inplace = True)
ball.dismissal_kind.unique()

In [None]:
#Make a new data frame of all entries with wickets.
#Multiple Filter on a single column.
yes_bowler_wickets = ['caught', 'bowled', 'lbw', 'stumped', 'caught and bowled', 'hit wicket']
bowler_wickets = ball[ball['dismissal_kind'].isin(yes_bowler_wickets)]
print('\n Type of Dismissals', '\n' ,bowler_wickets['dismissal_kind'].unique())

In [None]:
plt.figure(figsize=(15,112))
sns.countplot(y=bowler_wickets['bowler'],
              order = bowler_wickets['bowler'].value_counts().index)
plt.title('Most wicket taking bowler!')
plt.show()

In [None]:
#Drow a pie chart for top 10 wicket taking bowlers.
wickets_bowler = bowler_wickets.groupby(['bowler'])['dismissal_kind'].count().reset_index()
wickets_bowler.columns = ['Bowler', 'Wickets']
y = wickets_bowler.sort_values(by='Wickets', ascending = False).head(10).reset_index().drop('index', axis=1)
y

In [None]:
y.set_index('Bowler', inplace=True) #Change index with Bowler name column.

In [None]:
y['Wickets'].plot.pie(autopct='%1.1f%%', shadow = True , rotatelabels=True, radius = 0.9)
plt.ylabel('  Top 10 ' '\n Bowlers',fontsize=15,rotation = 0)
plt.yticks(fontsize=10)

In [None]:
#Drow a bar chart for top 10 most run scoring batsmen.
ball.columns

In [None]:
ball['batsman_runs'].unique()

In [None]:
top10runs= ball.groupby(['batsman'])['batsman_runs'].sum().reset_index()
top10runs.columns = ['Batsman', 'Runs']
top10runs = top10runs.sort_values(by = 'Runs', ascending = False).head(10).reset_index().drop('index', axis=1)
top10runs

In [None]:
ax = plt.axes()
ax.set(facecolor = "grey")
sns.barplot(x=top10runs['Batsman'],y=top10runs['Runs'],palette='rocket',saturation=1)
plt.xticks(rotation=90,fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel('\n Player',fontsize=15)
plt.ylabel('Total Runs',fontsize=15)
plt.title('Top 10 run scorers in IPL',fontsize=15,fontweight="bold")

In [None]:
#Drow a bar chart for top 10 most MOM award winners.
ax = plt.axes()
ax.set(facecolor = 'Black')
match.player_of_match.value_counts()[:10].plot(kind='bar')
plt.xlabel('Player', fontsize = 15, fontweight = 'bold')
plt.ylabel('Monst Man of the  Match', fontsize = 15, fontweight = 'bold')
plt.title('Most MoM winner', fontsize = 20, fontweight = 'bold')

Please share your feedback too!

Thank you for scrolling!