# IPL Exploratory Data Analysis (2008 - 2016)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
!dir

In [None]:
matches = pd.read_csv("matches.csv")
deliveries = pd.read_csv("deliveries.csv")

In [None]:
matches.head()

In [None]:
matches.info()

In [None]:
# umpire3 does not contain any values so lets drop that column
matches.drop(['umpire3'], inplace = True, axis = 1)

In [None]:
matches[pd.isnull(matches['city'])]

Here we can see that all the NaN values in `city` column have venue as `Dubai International Cricket Stadium`, So lets change their cities to Dubai

In [None]:
matches['city'] = matches['city'].fillna("DUBAI")

In [None]:
matches[pd.isnull(matches['city'])]

In [None]:
matches.describe()

In [None]:
deliveries.head()

In [None]:
deliveries.info()

In [None]:
matches.describe()

In [None]:
plt.figure(figsize=(14,6))
sns.set()
sns.barplot(x = list(matches['player_of_match'].value_counts()[0:10].keys()),
            y = matches['player_of_match'].value_counts()[0:10],
           )
plt.xlabel("Player Names",labelpad=14)
plt.ylabel("Number of Man of the Match",labelpad=14)
plt.title("Players with most Man of the Match",pad = 12)
plt.show()


In [None]:
plt.figure(figsize=(24,6))
sns.set()
sns.barplot(x = list(matches['winner'].value_counts()[0:10].keys()),
            y = matches['winner'].value_counts()[0:10],
           )
plt.xlabel("Teams",labelpad=14)
plt.ylabel("Number of Wins",labelpad=14)
plt.title("Teams with most wins")
plt.show()


In [None]:
# Number of Times a team which has the toss has also won the match
np.sum(matches['toss_winner'] == matches['winner'])

In [None]:
# Percentage of winning a match if the team has won the toss
print('%.2f' % ((np.sum(matches['toss_winner'] == matches['winner'])/ matches.shape[0])*100),'%')

In [None]:
# Toss Decisions
bat = matches['toss_decision'].value_counts()[0]
field = matches['toss_decision'].value_counts()[1]

plt.pie([bat,field],labels=['bat','field'], colors=sns.color_palette('pastel'), autopct='%.0f%%',shadow = True,startangle=60, explode = (0.1,0))
  
plt.show()

In [None]:
plt.subplots(figsize=(12,4))
sns.countplot(x='season',hue='toss_decision',data=matches)
plt.show()

### Toss Decision per Stadium (Interactive) 

In [None]:
import ipywidgets as ipyw

In [None]:
def plot_venue(venue):
    v = matches.loc[matches['venue'] == venue]
    sns.countplot(x='season',hue='toss_decision',data=v)
    plt.show()

In [None]:
venues = list(matches['venue'].unique())

In [None]:
ipyw.interact(plot_venue,venue = venues);

data used : https://www.kaggle.com/datasets/manasgarg/ipl