# Introducing the IPL Dataset

Following are the csv files in this data set and brief description 
1. points_table.csv - Provides the points table of all seasons. Smallest file of the dataset
1. all_season_summary.csv - Contains summary of all matches across season. Good to look into results of each game, scores, player of the match, venue, toss information, etc
1. all_season_details.csv - Provides detailed ball-by-ball data of every match across all seasobs. Largest file of the dataset
1. all_season_bowling_card.csv - Provides bowling stats of all matches across seasons
1. all_season_batting_card.csv - Provides batting stats of all matches across seasons

Filter by season to look into each specific season data.

This notebook provides a bird's-eye view of the points table and summary data files

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns; 
%matplotlib inline
import matplotlib.pyplot as plt

print('Files in this dataset')
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


## Points Table
> Let's look at the points table. The simple file to start with

In [None]:
points_df = pd.read_csv('/kaggle/input/indian-premier-league-ipl-all-seasons/points_table.csv', index_col=None)

In [None]:
print('Points Data Frame shape {}'.format(points_df.shape))

# Points table for 2020 season

In [None]:
print('Points table for 2020 season')
points_df.head(8)

## Matches won across all seasons

In [None]:
matcheswon_pivot = points_df.pivot_table(index='short_name', 
                              columns='season', 
                              values='matcheswon', fill_value=0)

f = plt.figure(figsize=(10, 8))
gs = f.add_gridspec(1, 1)

with sns.axes_style("whitegrid"):
    #sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 2.5})
    ax = f.add_subplot(gs[0, 0])
    g1 = sns.heatmap(matcheswon_pivot, annot=True, fmt="g", cmap='rocket')
    g1.set_facecolor("#fdb913")
    g1.axes.set_title("Number of matches won",fontsize=20)
    g1.set_xlabel("Season",fontsize=18)
    g1.set_ylabel("Team",fontsize=18)        
    g1.set_xticklabels(g1.get_xticklabels(), rotation=70)
    g1.set_yticklabels(g1.get_yticklabels(), rotation=0)

# Matches Summary

In [None]:
summary_df = pd.read_csv('/kaggle/input/indian-premier-league-ipl-all-seasons/all_season_summary.csv', index_col=None)
print('Summary Data Frame shape {}'.format(summary_df.shape))

In [None]:
pd.set_option('max_columns', None)
summary_df.head()

# Total games per season

In [None]:
f = plt.figure(figsize=(10, 8))
gs = f.add_gridspec(1, 1)
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 0])    
    g1 = sns.countplot(x="season",data=summary_df, palette=['#808282'])
    g1.axes.set_title("Total games per season",fontsize=20)
    g1.set_xlabel("Season",fontsize=18)
    g1.set_ylabel('# Games',fontsize=20)

# Team performance
Let's explore how teams performed overall. Its interesting to visualize that the most wins does not translate into similar win percentage. That is due to the fact that some teams did not play all the seasons especially Chennai Super Kings and Rajathan Royals.

Its evident that why there is a **fierece rivalry between Chennai Super Kings and Mumbai Indians**



In [None]:
def calculateWinPercentage():
    teams = ['MI', 'DC', 'SRH', 'RR', 'KKR', 'KXIP', 'CSK', 'RCB', 'RPS', 'GL', 'PWI', 'Kochi']
    win_percent = []
    for team in teams:
        row = {}
        team_df = summary_df.loc[summary_df['home_team'].str.contains(team) | summary_df['away_team'].str.contains(team)]
        games = len(team_df.index)        
        win = team_df.apply(lambda x : True if x['winner'] == team else False, axis = 1) 
        num_wins = len(win[win == True].index)         
        row['team'] = team
        row['total_games'] = games
        row['total_wins'] = num_wins
        row['win_pct'] = (num_wins / games) * 100
        win_percent.append(row)
    return win_percent
win_percent = calculateWinPercentage()
#print(win_percent)
winpct_df = pd.DataFrame(win_percent, index=None) 
winpct_df = winpct_df.sort_values('total_wins',ascending=False).reset_index()
winpct_df

In [None]:
f = plt.figure(figsize=(16, 5))
gs = f.add_gridspec(1, 2)
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 0])    
    g1 = sns.barplot(y="team",x="total_wins",data=winpct_df, palette=["#005ea0"])
    g1.axes.set_title("Most wins",fontsize=20)
    g1.set_xlabel("# Wins",fontsize=18)
    g1.set_ylabel('Team',fontsize=20)
winpct_df2 = winpct_df.sort_values('win_pct',ascending=False).reset_index()    
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 1])    
    g1 = sns.barplot(y="team",x="win_pct",data=winpct_df2,palette=["#fdb913"])
    g1.axes.set_title("Win percetage",fontsize=20)
    g1.set_xlabel("% Wins",fontsize=18)
    g1.set_ylabel('Team',fontsize=20)    

# Venues
IPL did travel overseas to South Africa and UAE !

In [None]:
f = plt.figure(figsize=(10, 8))
gs = f.add_gridspec(1, 1)
with sns.axes_style("darkgrid"):
    #sns.set_context("notebook", font_scale=1.0, rc={"lines.linewidth": 3.5})    
    ax = f.add_subplot(gs[0, 0])    
    g1 = sns.countplot(y="venue_name", data=summary_df, palette="rocket", order = summary_df['venue_name'].value_counts().index)
    #ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    #ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
    #g1.set_facecolor("#fdb913")
    g1.axes.set_title("Matches played at different venue",fontsize=20)
    g1.set_xlabel("Number of matches",fontsize=18)
    g1.set_ylabel(None,fontsize=20)
    ax.set_xticks(range(0,100,10))

# Toss Advantage
Its interesting to see that though the teams choose to bowl first after winning the toss, it does not have much influence in the out come of the game


In [None]:
def TossWinVsGameWin():  
    toss = summary_df[['season','toss_won', 'decision','winner']]
    toss['toss_influenced_outcome'] = toss.apply(lambda x : True if x['toss_won'] == x['winner'] else False, axis = 1) 
    return toss
toss = TossWinVsGameWin()

In [None]:
f = plt.figure(figsize=(20, 2))
gs = f.add_gridspec(1, 2)
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 0])    
    g1 = sns.countplot(y="decision",data=toss, palette='magma')
    g1.axes.set_title("What does the team do after winning toss?",fontsize=20)
    g1.set_ylabel(None,fontsize=18)
    g1.set_xlabel('Count',fontsize=20)
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 1])    
    g1 = sns.countplot(y="toss_influenced_outcome", data=toss, palette='magma')
    g1.axes.set_title("Did winning toss help teams to win the game?",fontsize=20)
    g1.set_ylabel(None,fontsize=18)
    g1.set_yticklabels(['No','Yes'])
    g1.set_xlabel('Count',fontsize=20)    

# Player of the match


In [None]:
summary_pom = summary_df.loc[summary_df['pom'] != 'None'] # Remove the abandoned games
f = plt.figure(figsize=(14,8))
gs = f.add_gridspec(1, 1)
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 0])    
    g1 = sns.countplot(y="pom",data=summary_pom, palette='magma', order = summary_pom['pom'].value_counts().iloc[:20].index)
    g1.axes.set_title("Top 20 all time player of the match award winners",fontsize=20)
    g1.set_ylabel(None,fontsize=18)
    g1.set_xlabel('Count',fontsize=20)

# Captaincy

In [None]:
def Captaincy():
    captaincy_data = []
    for index, match in summary_df.iterrows():
        row = {}
        if summary_df.at[index, 'home_team'] == summary_df.at[index, 'winner']:
            row['winning_captain'] = summary_df.at[index, 'home_captain']
            row['winning_team'] = summary_df.at[index, 'home_team']
        elif summary_df.at[index, 'away_team'] == summary_df.at[index, 'winner']:
            row['winning_captain'] = summary_df.at[index, 'away_captain']
            row['winning_team'] = summary_df.at[index, 'away_team']     
        captaincy_data.append(row)
    return captaincy_data
captaincy_data = Captaincy()
cap_df = pd.DataFrame(captaincy_data, index=None) 
f = plt.figure(figsize=(14,12))
gs = f.add_gridspec(1, 1)
with sns.axes_style("darkgrid"):
    ax = f.add_subplot(gs[0, 0])    
    g1 = sns.countplot(y="winning_captain",data=cap_df, palette='viridis_r', order = cap_df['winning_captain'].value_counts().index)
    g1.axes.set_title("Most wins by a captain",fontsize=20)
    g1.set_ylabel(None,fontsize=18)
    g1.set_xlabel('Count',fontsize=20)