# IPL 2008 -2020

## Importing Libraries 

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(rc={'figure.figsize':(12,10)})
#plt.figure(figsize=(15,12))



import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



## Importing Dataset

In [None]:
matches = pd.read_csv("../input/ipl-complete-dataset-20082020/IPL Matches 2008-2020.csv")
deliveries = pd.read_csv("../input/ipl-complete-dataset-20082020/IPL Ball-by-Ball 2008-2020.csv")

In [None]:
team_names = matches.team1.unique()
team_names

In [None]:
team_names_abv = ["RCB","KXIP","DD","MI","KKR","RR","DeCh","CSK","KTK","PWI","SH","GL","RPS","RPS","DC"]
matches.replace(team_names,team_names_abv,inplace = True)

In [None]:
matches

## Data Cleaning

In [None]:
matches.isnull().sum().plot(kind='bar')

### Fixing Non D/L Matches

In [None]:
matches = matches.fillna(value={'method':0})


In [None]:
matches[matches.result.isnull()]

### Removing Not Result Matches

In [None]:
matches = matches[matches['result'].notnull()]


### Fixing Result Margin of Tie Matches

In [None]:
matches.loc[matches.result=='tie', 'result_margin']=0

### Fixing Missing Values of City according to their Venues

In [None]:
matches.venue[matches.city.isnull()]

In [None]:
matches.city[matches.venue =='Dubai International Cricket Stadium'].unique()

In [None]:
matches.city[matches.venue == 'Sharjah Cricket Stadium'].unique()

In [None]:
def venue_func(x):
    if x == 'Dubai International Cricket Stadium':
        return 'Dubai'
    if x == 'Sharjah Cricket Stadium':
        return 'Sharjah'
    else :
        return x
    
matches.city = matches.venue.apply(venue_func)

In [None]:
matches.isnull().sum()

## Number of Matches at Venues

In [None]:
matches.venue.value_counts()[:15].plot(kind = 'barh')


## Most Number of D/L Matches at different venues

In [None]:
matches.loc[matches.method == 'D/L',['venue']].value_counts()

## Toss Desicions

In [None]:
matches.toss_decision.value_counts().plot(kind = 'pie')

## Total Wins/Teams

In [None]:

plt.subplot(1,2,1)
matches.toss_winner.value_counts().plot(kind = 'bar')
plt.subplot(1,2,2)
matches.toss_winner.value_counts().plot(kind = 'pie')

## Total Matches per Year

In [None]:
matches['season'] = matches['date'].str[:4].astype(int)
matches.season.value_counts().plot(kind = 'bar')

## Matches win by every Team

In [None]:
total_win = matches.winner.value_counts()
total_win.plot(kind = 'bar')

## Total Matches Played by every Team

In [None]:
total_matches = (matches.team1.value_counts()+ matches.team2.value_counts())
total_matches.plot(kind = 'bar')

## Win Percentage of every team

In [None]:
win_percentage = total_win/total_matches*100
win_percentage.plot(kind = 'bar')

## Most Eliminator Matches Wins per Teams

In [None]:
matches.winner[matches.eliminator =='Y'].value_counts().plot(kind = 'pie')

## Most Player of Match Award Winner

In [None]:
matches.player_of_match.value_counts()[:10].plot(kind = 'bar')

## Ball-by-Ball Analysis

In [None]:
deliveries.head()

In [None]:
deliveries.replace(team_names,team_names_abv,inplace = True)

In [None]:
sns.countplot(x=deliveries.inning)

In [None]:
deliveries.info()

In [None]:
deliveries.isnull().sum().plot(kind = 'barh')

### Most Dismissal Ways of Batsman

In [None]:
deliveries['dismissal_kind'].value_counts()

### Most Matches Played by Batsman

In [None]:
most_match_bats = deliveries.groupby(['batsman']).agg({'id': lambda x: len(set(x))}).sort_values(ascending = False,by = 'id')
most_match_bats[:15]

### Most Matches Played by Bowler

In [None]:
bowl_match=deliveries.groupby(['bowler']).agg({'id': lambda x : len(set(x))}).sort_values(ascending = False,by = 'id')
bowl_match[:15]

### Most delivieries bowled Bowlers

In [None]:
deliveries.bowler.value_counts()[:10].plot(kind = 'bar')

### Batsman having faced maximium balls

In [None]:
bats = deliveries.batsman.value_counts()
bats[:15]

### Top Batsmans(According to Runs)

In [None]:
runs = deliveries.groupby(['batsman']).batsman_runs.sum().sort_values(ascending=False)
runs[:15]

### Top Batsman(According to Strike Rates)

In [None]:
strike_rate = (runs/bats[:100]*100).sort_values(ascending = False)
strike_rate[:15].plot(kind = 'barh')

### Max no. of times Batsman getting out

In [None]:
out = deliveries.groupby(['batsman']).is_wicket.sum().sort_values(ascending = False)
out[:15]

### Max Average by Batsman

In [None]:
avg = (runs/out[:240]).sort_values(ascending = False)
avg[:15]

### Batsman hitted maximium number of 6s

In [None]:
deliveries.batsman[deliveries.batsman_runs ==6].value_counts()[:15]

### Batsman hitted maximium number of 4s

In [None]:
deliveries.batsman[deliveries.batsman_runs ==4].value_counts()[:10]

### No. of Times Batsman Hitted Six on No-Ball

In [None]:
deliveries.batsman[deliveries.total_runs==7].value_counts()[:10]

### Most Wickets by Bowler

In [None]:
most_wickets = deliveries.groupby(['bowler']).is_wicket.sum().sort_values(ascending = False)
most_wickets[:15]

### Bowlers who give more extra runs(Including LB,NB,Wide) per match

In [None]:
(deliveries.groupby(['bowler']).extra_runs.sum()/bowl_match['id']).sort_values(ascending = False)[:15]

### Highest Scores by team

In [None]:
deliveries.groupby(['id','batting_team']).total_runs.sum().sort_values(ascending = False)[:15]

### Wicket Hauls by bowlers

In [None]:
deliveries.groupby(['id','bowler']).is_wicket.sum().sort_values(ascending = False).value_counts()


Only 6 times in IPL, bowler able to take 6 wickets in Match.
Only 5 times in IPL, bowler took 5 Wickets.

### Highest Scores by Batsman in a Match

In [None]:

high_scores=(deliveries.groupby(['batsman','id','batting_team']).total_runs.sum()-deliveries.groupby(['batsman','id','batting_team']).extra_runs.sum()).sort_values(ascending = False)[:15]
high_scores