In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
delivery = pd.read_csv("delivery.csv")
delivery.head()

Unnamed: 0,id,inning,over,ball,batsman,non_striker,bowler,batsman_runs,extra_runs,total_runs,non_boundary,is_wicket,dismissal_kind,player_dismissed,fielder,extras_type,batting_team,bowling_team
0,335982,1,6,5,RT Ponting,BB McCullum,AA Noffke,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
1,335982,1,6,6,BB McCullum,RT Ponting,AA Noffke,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
2,335982,1,7,1,BB McCullum,RT Ponting,Z Khan,0,0,0,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
3,335982,1,7,2,BB McCullum,RT Ponting,Z Khan,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore
4,335982,1,7,3,RT Ponting,BB McCullum,Z Khan,1,0,1,0,0,,,,,Kolkata Knight Riders,Royal Challengers Bangalore


In [3]:
match = pd.read_csv("match.csv")
match["team1"]=match["team1"].replace("Rising Pune Supergiant","Rising Pune Supergiants")
match["team2"]=match["team2"].replace("Rising Pune Supergiant","Rising Pune Supergiants")
match["winner"] = match["winner"].replace("Rising Pune Supergiant","Rising Pune Supergiants")
match["toss_winner"] = match["toss_winner"].replace("Rising Pune Supergiant","Rising Pune Supergiants")
#So above code will replace the occurrence of “Rising Pune Supergiant” with “Rising Pune Supergiants” in the series
#team1, team2, winner,and toss_winner

In [7]:
match.shape
delivery.shape

(193468, 18)

In [None]:
match.info()

In [None]:
delivery.info()

In [None]:
match.describe()

In [None]:
delivery.describe()

In [None]:
#Getting all the teams which are part of all 13 IPL tournaments
all_teams = match["team1"].tolist()
all_teams = list(set(all_teams))
all_teams

In [None]:
#Eliminator matches
match["eliminator"].value_counts()

In [None]:
#Graphical representation of eliminator
sns.countplot("eliminator",data = match);

In [None]:
#Number of mathes in each venue
plt.figure(figsize=(16,8))
sns.countplot("venue", data = match)
plt.xticks(rotation = "vertical")
plt.title("Number of matches played at each venue");

In [None]:
#Number of Matches played by each team
plt.figure(figsize=(16,8))
x = match["team1"].value_counts()
y = match["team2"].value_counts()
z = x + y
z.plot(kind = "barh")
plt.title("Number of Matches played by each team");

In [None]:
#Number of matches won by each team
plt.figure(figsize=(16,8))
sns.countplot("winner", data = match)
plt.xticks(rotation = "vertical")
plt.title("Number of Matches won by each team")
winners_list = match["winner"].value_counts()
winners_list;

# IPL Batting Analysis

In [None]:
#Most Run Scored by IPL Teams
delivery.groupby(['batting_team'])['total_runs'].sum().sort_values(ascending=False)

In [None]:
#Avg Run by Teams in Powerplay
delivery[delivery['over']<6].groupby(['id','batting_team']).sum()['total_runs'].groupby('batting_team').mean().sort_values(ascending=False)[2:]

In [None]:
#Most IPL Century by a Player
runs = delivery.groupby(['batsman','id'])['batsman_runs'].sum()
runs[runs >= 100].droplevel(level=1).groupby('batsman').count().sort_values(ascending=False)[:10]

In [None]:
#Most IPL Fifty by Player
plt.figure(figsize=(10,8))
runs = delivery.groupby(['batsman','id'])['batsman_runs'].sum()
data = runs[runs >= 50].droplevel(level=1).groupby('batsman').count().sort_values(ascending=False)[:10]
sns.barplot(y=data.index,x=data,orient='h')
plt.xlabel('Half-Centuries')
plt.ylabel('Batsman')
plt.show()

In [None]:
#Most Boundary (4s) hit by a Batsman
plt.figure(figsize=(10,8))
data = delivery[delivery['batsman_runs'] == 4]['batsman'].value_counts()[:10]
sns.barplot(y=data.index,x=data,orient='h')
plt.xlabel('Fours')
plt.ylabel('Batsman')
plt.show()


In [None]:
#Highest Total by IPL Teams
delivery.groupby(['id','batting_team']).sum()['total_runs'].droplevel(level=0).sort_values(ascending=False)[:10]

In [None]:
#Most IPL Sixes Hit by a batsman
plt.figure(figsize=(10,8))
data = delivery[delivery['batsman_runs'] == 6]['batsman'].value_counts()[:10]
sns.barplot(y=data.index,x=data,orient='h')
plt.xlabel('Sixes')
plt.ylabel('Batsman')
plt.show()

In [None]:
#Highest Individual IPL score
delivery.groupby(['batsman','id'])['batsman_runs'].sum().sort_values(ascending=False)[:10]

In [None]:
#To find the batsman with most runs in IPL till date (2020)
orange_cap =  delivery.groupby("batsman")["batsman_runs"].agg("sum").reset_index()
top_20 = orange_cap.sort_values("batsman_runs", ascending=False).head(20)
top_20

In [None]:
plt.figure(figsize=(16,8))
sns.barplot(x = top_20.batsman, y = top_20.batsman_runs)
plt.xticks(rotation = "vertical")
plt.title("Top 20 Run getters in IPL so far (2020)", fontsize = 17);

# Bowling Statistics

In [None]:
#Most Dot Ball by a Bowler
plt.figure(figsize=(10,8))
data = delivery[delivery['total_runs'] == 0].groupby('bowler').count()['id'].sort_values(ascending=False)[:10]
sns.barplot(y=data.index,x=data,orient='h')
plt.xlabel('Dot Balls')
plt.ylabel('bowler')
plt.show()

In [None]:
#Most Maiden over by a Bowler
data = delivery.groupby(['id','bowler','over'])['total_runs'].sum()
data = data[data.values == 0].droplevel(level=[0,2])
data.index.value_counts()[:10]

In [None]:
#Most Extras by a IPL team
delivery.groupby(['bowling_team'])["extra_runs"].agg('sum').sort_values(ascending=False)

In [None]:
#Most IPL Wickets by a Bowler
mask1=delivery['dismissal_kind']=='caught'
mask2=delivery['dismissal_kind']=='bowled'
mask3=delivery['dismissal_kind']=='lbw'
mask4=delivery['dismissal_kind']=='caught and bowled'
mask5=delivery['dismissal_kind']=='stumped'

new_del=delivery[mask1 | mask2 | mask3 | mask4 | mask5]
new_del_final = new_del.groupby('bowler')['player_dismissed'].agg('count').sort_values(ascending=False).head(20)
print(new_del_final)
plt.figure(figsize=(16,8))
sns.barplot(x = new_del_final.index, y = new_del_final.values)
plt.xticks(rotation = "vertical");

In [None]:
#Top dismissal method
plt.figure(figsize=(10,8))
sns.countplot(x = delivery.dismissal_kind)
plt.xticks(rotation = "vertical")
plt.title("Number of dismissals and their methods");

In [None]:
#Most Number of runs given by a bowler
most_economic = delivery.groupby("bowler")["total_runs"].agg("sum").reset_index()
top_20_economic = most_economic.sort_values("total_runs", ascending=False).head(20)
print(top_20_economic)
plt.figure(figsize=(16,8))
sns.barplot(x = top_20_economic.bowler, y = top_20_economic.total_runs)
plt.xticks(rotation = "vertical")
plt.title("Top 20 most economic bowlers in IPL so far (2020)", fontsize = 17);

In [None]:
#Chawla Bowling Performance
condition = delivery["bowler"] == "PP Chawla"
chawla_bowling_by_team = delivery[condition].groupby("batting_team")["total_runs"].agg(sum)
print(chawla_bowling_by_team)
plt.figure(figsize=(10,8))
sns.barplot(x = chawla_bowling_by_team.index, y = chawla_bowling_by_team.values)
plt.xticks(rotation = "vertical")
plt.title("Chawla Bowling Performace with Each team");


# Miscellaneous

In [None]:
#Most Number of wins in eliminator
sns.countplot(match["winner"][match["eliminator"] == "Y"])
plt.xticks(rotation = "vertical")
plt.title("Number of wins by a team in a eliminator");

In [None]:
# Top 5 MOM players 
mom_players = match["player_of_match"].value_counts()
temp_values = mom_players.head()

In [None]:
sns.barplot(x = temp_values.index, y = temp_values.values)
plt.title("Number of Man of the Match Winning matches by each player")
plt.xlabel("Player Name")
plt.ylabel("Number of MOM awards");

In [None]:
plt.figure(figsize=(16,8))
delivery3 =delivery[["batting_team","over","batsman_runs"]] 
x = delivery3.pivot_table(values = "batsman_runs", index = "batting_team", columns = "over", aggfunc = "count")
sns.heatmap(x,cmap = "summer")
plt.title("Heat map of each franchise vs over and there score in each over", fontsize = 20);

In [None]:
#Runs scored by Kohli against Malinga
mask=delivery['bowler']=='SL Malinga'
mask2=delivery['batsman']=='V Kohli'
delivery[mask].groupby('batsman')['batsman_runs'].agg('count').sort_values(ascending=False)['V Kohli']