Exploratory Data Analysis (EDA) on IPL

In [28]:
#Importing Essential Libraries or Modules
import numpy as np  # --> linear algebra
import pandas as pd # --> data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px

In [29]:
#Reading our CSV files
Matches = pd.read_csv("IPL_Dataset.csv",index_col='id')
Records = pd.read_csv("Stats.csv")

In [30]:
#Data Preprocessing
Matches.columns

Index(['city', 'date', 'Man of the Match', 'venue', 'neutral_venue', 'team1',
       'team2', 'Toss Winner', 'Toss Decision', 'winner', 'result',
       'result_margin', 'eliminator', 'method', 'umpire1', 'umpire2'],
      dtype='object')

In [31]:
#Deleting Method which is not useful
Matches.loc[Matches.method.notnull()]
Matches.drop(['method'],axis=1, inplace=True)
Matches.info()

<class 'pandas.core.frame.DataFrame'>
Index: 816 entries, 335982 to 1237181
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   city              803 non-null    object 
 1   date              816 non-null    object 
 2   Man of the Match  812 non-null    object 
 3   venue             816 non-null    object 
 4   neutral_venue     816 non-null    int64  
 5   team1             816 non-null    object 
 6   team2             816 non-null    object 
 7   Toss Winner       816 non-null    object 
 8   Toss Decision     816 non-null    object 
 9   winner            812 non-null    object 
 10  result            812 non-null    object 
 11  result_margin     799 non-null    float64
 12  eliminator        812 non-null    object 
 13  umpire1           816 non-null    object 
 14  umpire2           816 non-null    object 
dtypes: float64(1), int64(1), object(13)
memory usage: 102.0+ KB


In [32]:
# #Pie Graph on Winner Team
df1 = Matches.groupby(['winner'])[
    'winner'].count().reset_index(name='count')

# Pie chart using the Plotly
fig = px.pie(df1, values='count', names='winner', title='Most IPL wins')
fig.show()

In [33]:
#Select two columns with conditional values

Matches[['eliminator', 'winner']][Matches['eliminator'] == 'Y'].value_counts()

eliminator  winner                     
Y           Kings XI Punjab                3
            Delhi Capitals                 2
            Mumbai Indians                 2
            Rajasthan Royals               2
            Royal Challengers Bangalore    2
            Kolkata Knight Riders          1
            Sunrisers Hyderabad            1
Name: count, dtype: int64

In [34]:
#Bar Plot - Most Wins in Eliminator

df2 = Matches.groupby('winner')['eliminator'].apply(lambda x: (x == 'Y').sum()).reset_index(name='count')

# Bar chart using the Plotly
fig = px.bar(df2, x='winner', y='count', color="winner", title='Most IPL wins in Eliminator')
fig.show()

In [35]:
#Most Runs in IPL

fig=px.scatter(Records.head(15), x='Player', y='Runs', color='Player', size='Runs', title='15 Top Most Players Having Maximum Runs in IPL')
fig.show()

In [36]:
#Most No of Centuries in IPL

Records1 = Records.sort_values(by='100s', ascending=False)
fig = px.bar(Records1.head(15), x='Player', y='100s', color='Player',orientation='h', title="Top '15' Players with Most Hundered (100s).")
fig.show()

In [37]:
#Player Stats

Records2 = Records.sort_values(by=['Strike Rate'], ascending=False).head(5)
fig = px.sunburst(Records2, path=['4s','6s','Strike Rate','Player'], values='Runs', title='Stats of 5 Players having Highest Strike Rate')
fig.show()

In [38]:
#Most Sixes 
Records3 = Records.sort_values(by=['6s'], ascending=False).head(5)
fig=px.scatter(Records.head(15), x='Player', y='6s', color='Player', size='6s', title="Top '15' Players with Most Sixes (6s)")
fig.show()

In [39]:
#Top Famous Venues
# Count the number of matches played at each venue/stadium
venue_counts = Matches['venue'].value_counts()

# Create a DataFrame with the count of matches played at each venue/stadium
df3 = pd.DataFrame({'venue': venue_counts.index, 'Matches played': venue_counts.values})

# Sort the DataFrame 
df3 = df3.sort_values(by='Matches played', ascending=False).head(10)

# Bar chart using the Plotly
fig = px.bar(df3, x='venue', y='Matches played', color='venue', title='10 Most Popular Venue or Stadium')
fig.show()

In [40]:
#Most Player of the Match Awards
# Count the number of awards
award_counts = Matches['Man of the Match'].value_counts()

# # Create a DataFrame with the count of award
df4 = pd.DataFrame({'Man of the Match': award_counts.index, 'No of Awards': award_counts.values})

# # Sort the DataFrame 
df4 = df4.sort_values(by='No of Awards', ascending=False).head(10)

# # Bar chart using the Plotly
fig = px.scatter(df4, x='Man of the Match', y='No of Awards', color='Man of the Match', size='No of Awards' , title='10 Most "Man of the Match" Awarded Player')
fig.show()


In [41]:
#Most no of Toss Wins!
# Count the number of Toss won by a particular Franchise
toss_counts = Matches['Toss Winner'].value_counts()

# # Create a DataFrame with the count of award
df5 = pd.DataFrame({'Toss Winner': toss_counts.index, 'No of Toss Won': toss_counts.values})

# # Sort the DataFrame 
df5 = df5.sort_values(by='No of Toss Won', ascending=False).head(10)

# Pie chart using the Plotly
fig = px.pie(df5, values='No of Toss Won', names='Toss Winner', color='Toss Winner', title='10 Teams with Most Toss Wins')
fig.show()


In [42]:
#Elected To Bat or Field after Winning Toss.

BatField_counts = Matches['Toss Decision'].value_counts()

df6 = pd.DataFrame({'Toss Decision': BatField_counts.index, 'Elected to Bat or Field': BatField_counts.values})

df6 = df6.sort_values(by='Toss Decision', ascending=False).head(10)

fig = px.bar(df6, x="Toss Decision", y="Elected to Bat or Field", color='Toss Decision', title='Most Elected option after winning Toss')
fig.show()


In [43]:
#Top Umpires
# Count the number of times Umpire is Umpiring
umpire_count = Matches['umpire1'].value_counts()

# # Create a DataFrame with the count of umpire
df5 = pd.DataFrame({'umpire1': umpire_count.index, 'Umpired Matches': umpire_count.values})

# # Sort the DataFrame 
df5 = df5.sort_values(by='Umpired Matches', ascending=False).head(10)

# Pie chart using the Plotly
fig = px.bar(df5, y='Umpired Matches', x='umpire1', color='umpire1', title='Top Umpires')
fig.show()

In [44]:
#Rivalry Between Strongest Teams. 

#MI VS CSK
num_mi_wins = len(Matches[(Matches["team1"] == 'Chennai Super Kings') & (Matches["team2"]=='Mumbai Indians') & (Matches["winner"] == "Mumbai Indians")])

num_csk_wins = len(Matches[(Matches["team1"] == 'Mumbai Indians') & (Matches["team2"]=='Chennai Super Kings') & (Matches["winner"] == "Chennai Super Kings")])

# Create a DataFrame with the team names and number of wins
data = {'Team': ['Mumbai Indians', 'Chennai Super Kings'], 'Wins': [num_mi_wins, num_csk_wins]}

df = pd.DataFrame(data)

# Create the bar chart using Plotly Express
fig = px.scatter(df, x='Team', y='Wins', color='Team', size='Wins',title='MI vs CSK')
fig.show()

In [46]:
#Rivalry Between Strongest Teams. - MI VS RCB
num_rcb_wins = len(Matches[(Matches["team1"] == 'Mumbai Indians') & (Matches["team2"]=='Royal Challengers Bangalore') & (Matches["winner"] == "Royal Challengers Bangalore")])
num_mi1_wins = len(Matches[(Matches["team1"] == 'Royal Challengers Bangalore') & (Matches["team2"]=='Mumbai Indians') & (Matches["winner"] == "Mumbai Indians")])
data = {'Team': ['Royal Challengers Bangalore', 'Mumbai Indians'], 'Wins': [num_rcb_wins, num_mi1_wins]}
df = pd.DataFrame(data)
fig = px.bar(df, x='Team', y='Wins', color='Team', title='MI vs RCB') 
fig.show()