In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Get Data

In [None]:
data = pd.read_csv('/kaggle/input/ipl-complete-dataset-20082020/IPL Matches 2008-2020.csv')

In [None]:
data.head()

# Data Pre-Processing

In [None]:
data = data.replace(to_replace='Rising Pune Supergiant',value='Rising Pune Supergiants')

# Dealing with Null values

In [None]:
data.isnull().sum()

In [None]:
data = data.drop(columns=['method'])

# Exploratory Data Analysis (EDA)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data

# Most Wins in IPL

In [None]:
temp = pd.DataFrame({'Winner':data['winner']})
count_wins = temp.value_counts()
print(count_wins.values)
labels = [x[0] for x in count_wins.keys()]

bar,ax = plt.subplots(figsize=(20,12))
ax = plt.pie(x= count_wins,autopct="%.1f%%",labels=labels)
plt.title('Most Wins in History of IPL',fontsize=16)

# Most Wins in Eliminator

In [None]:
sns.countplot(data['winner'][data['eliminator']=='Y'], data=data)
plt.title('Most Wins in eliminator')
plt.xticks(rotation=90)

# Toss Decisions

In [None]:
teams = data['toss_winner'].unique()
decision_making = pd.DataFrame([],columns=['Toss Winner','Decision','Times'])
for id, element in enumerate(teams):
    temp_bat = data[(data['toss_winner']==element) & (data['toss_decision']=='bat')]
    temp_field = data[(data['toss_winner']==element) & (data['toss_decision']=='field')]
    decision_making = decision_making.append({'Toss Winner':element, 'Decision':'bat','Times':temp_bat['toss_winner'].count()}, ignore_index=True)
    decision_making = decision_making.append({'Toss Winner':element, 'Decision':'field', 'Times':temp_field['toss_winner'].count()}, ignore_index=True)
decision_making

In [None]:
sns.catplot(x='Toss Winner',y='Times',hue='Decision',data=decision_making, kind='bar', height=5, aspect=2)
plt.xticks(rotation=90)
plt.xlabel('IPL Teams')
plt.ylabel('Toss Decision')
plt.title('Toss Decision per Team')

# Famous Venue

In [None]:
sns.barplot(x = data['venue'].value_counts().head(8).values, y=data['venue'].value_counts().head(8).index , data = data)
plt.title('Most Famous Venues')
plt.xlabel('Venue Count')
plt.ylabel('Venue')

# Impact of Toss Result

In [None]:
toss_losses=0
toss_wins=0
for element in data.values:
    toss_winner = element[8]
    match_winner = element[10]
    if toss_winner != match_winner:
        toss_losses+=1
    else:
        toss_wins+=1
result = pd.DataFrame({'Result':[toss_losses, toss_wins],'type':['Toss Wins Match losses','Toss Wins Match Wins']})
temp =  result.set_index('type')
bar,ax = plt.subplots(figsize=(20,12))
labels = [x for x in temp.index]
print(labels)
ax = plt.pie(x= temp,autopct="%.1f%%",labels=labels)
plt.title('Impact of Toss Wins on Match Wins')    


# Most Man of Match

In [None]:
data['year'] = [x[:4] for x in data['date']]
years = data.year.unique()
print(years)
man_of_match = pd.DataFrame([],columns=['Year','Man of Match','Times'])
for id, year in enumerate(years):
    player_of_match = data['player_of_match'][data['year']==year]
    man_of_match.loc[id] = [year, player_of_match.value_counts().index[0], player_of_match.value_counts()[0]]
man_of_match
  
    

In [None]:
sns.barplot(x='Man of Match',y = 'Times', data= man_of_match)
plt.xticks(rotation=90)
plt.title('Most Man of Match Year-Wise (2008-2020)')

# Top 5 Umpire_1

In [None]:
sns.barplot(x = data['umpire1'].value_counts().head().index, y = data['umpire1'].value_counts().head().values, data=data)
plt.xticks(rotation=45)
plt.xlabel('Umpire 1')
plt.ylabel('Match Count')
plt.title('Top 5 Umpire_1')

# Top 5 Umpire_2

In [None]:
sns.barplot(x = data['umpire2'].value_counts().head().values, y = data['umpire2'].value_counts().head().index, data= data)
plt.xlabel('Match Count')
plt.ylabel('Umpire 2')
plt.title('Top 5 Umpire_2')