# **Exploratory Data Analysis Of Indian Premier League(2008-2019)** 

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O 
import matplotlib.pyplot as plt # data visualisation
import seaborn as sns
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
pip install jovian --quiet

In [None]:
import jovian

# Analysis Of Matches Dataset

In [None]:
matches_df=pd.read_csv('/kaggle/input/ipl-data-set/matches.csv')
matches_df.head()

In [None]:
matches_df.shape

Matches dataset has 756 rows and 18 columns

## Number of teams in all seasons:

In [None]:
matches_df['team1'].unique()

#### Since there are two Rising Pune Supergiants (one without s in end) we can replace *supergiant* with *supergiants*
#### And Delhi Daredevils are now Delhi Capitals So we Have to replace that also

In [None]:
matches_df.replace('Rising Pune Supergiant','Rising Pune Supergiants',inplace=True)
matches_df.replace('Delhi Daredevils','Delhi Capitals',inplace=True)

In [None]:
matches_df['umpire3'].isna().sum()

Since 637 values of 3rd umpire are misiing we will drop the 'umpire3' column

In [None]:
matches_df.drop('umpire3',axis=1,inplace=True)

## Number Of different Venues:

In [None]:
matches_df['city'].unique()

## Which city has how many matches?

In [None]:
sns.set_style('darkgrid')
plt.rcParams['font.size'] = 14

In [None]:
plt.figure(figsize=(14,6))
matches_df['city'].value_counts().plot.bar(width=0.7,color='red')
plt.xlabel('City')
plt.ylabel('No of matches')
plt.show()

### Which venue has how many matches?

In [None]:
plt.figure(figsize=(14,6))
matches_df['venue'].value_counts().plot.bar(width=0.7,color='blue')
plt.xlabel('Venue')
plt.ylabel('No of matches')
plt.show()

### Eden Gardens has hosted most of the matches

## Which Team has won most matches?

In [None]:
plt.figure(figsize=(14,6))
matches_df['winner'].value_counts().plot.bar(width=0.9,color="orange",alpha=0.75)
plt.xlabel("Name of the teams")
plt.ylabel("No.of wins ")
plt.show()

### Mumbai Indians has won most of the matches

## What do teams choose after winning toss?

In [None]:
plt.figure(figsize=(14,6))
plt.pie(matches_df['toss_decision'].value_counts(), labels=matches_df['toss_decision'].value_counts().index, autopct='%1.1f%%', startangle=180)
plt.show()

### Most of the teams like to field after winning toss.

In [None]:
project_name='IPL(2008-2019)-EDAProject'

In [None]:
jovian.commit(project=project_name)

## Who was awarded man of match for most number of times?

In [None]:
pom=matches_df.groupby('player_of_match')['player_of_match'].count()
pom_df=pd.DataFrame(pom)

In [None]:
pom_df.rename(columns={'player_of_match':'Number of ManOfMatch'},inplace=True)
pom_df.sort_values('Number of ManOfMatch',ascending=False)

#### Chris Gayle has been awarded 21 times Man of Match award

# Analysis Of Players Dataset

In [None]:
players_df=pd.read_excel('../input/ipl-data-set/Players.xlsx')
players_df

In [None]:
players_df.shape

#### Total 565 players have participated in all seasons of IPL

## Number of players from each Country

In [None]:
sns.countplot(players_df['Country'])
plt.xticks(rotation=75)

### Variaties of players

In [None]:
players_df['Bowling_Skill'].unique()

In [None]:
jovian.commit(project=project_name)

# Analysis of teams home and away matches 

In [None]:
home_away_df=pd.read_csv('../input/ipl-data-set/teamwise_home_and_away.csv')
home_away_df

In [None]:
plt.bar(home_away_df.team,home_away_df.home_win_percentage)
plt.xticks(rotation=90)

# Analysis Of Batsmans

In [None]:
bat_df=pd.read_csv('../input/ipl-data-set/most_runs_average_strikerate.csv')
bat_df

## Who is the highest run scorer of all seasons?

In [None]:
sns.barplot('batsman', 'total_runs', data=bat_df.head(5))
plt.xticks(rotation=75)

#### Virat Kohli has scored highest 5426 runs.

## Which batsman has highest strike rate?

In [None]:
bat_df[bat_df.total_runs>1000] #should have scored more than 1000 runs 

In [None]:
sns.barplot('batsman', 'strikerate', data=bat_df[bat_df.total_runs>1000].sort_values('strikerate',ascending=False).head(5))
plt.xticks(rotation=75)

#### Andre Russell has the highest strike rate

In [None]:
jovian.commit(project=project_name)

## So this was the exploratory data analyis of IPL dataset from 2008-2019.Hope this has helped you to get know about interesting sights of 13 seasons of IPL.

## References:
Dataset-https://jovian.ml/outlink?url=https%3A%2F%2Fmatplotlib.org%2F3.1.1%2Fgallery%2Findex.html


Seaborn-https://jovian.ml/outlink?url=https%3A%2F%2Fseaborn.pydata.org%2Fexamples%2Findex.html

Matplotlib-https://jovian.ml/outlink?url=https%3A%2F%2Fmatplotlib.org%2F3.1.1%2Fgallery%2Findex.html


In [None]:
jovian.commit(project=project_name)