![](http://images.indianexpress.com/2020/09/ipl-schedule.jpg)

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import altair as alt
import plotly.graph_objects as go
import plotly.express as px
from matplotlib import pyplot as plt


IPL_match = pd.read_csv('../input/ipl-complete-dataset-20082020/IPL Matches 2008-2020.csv')
IPL_ball = pd.read_csv('../input/ipl-complete-dataset-20082020/IPL Ball-by-Ball 2008-2020.csv')

IPL_match.head()

# Most of the matches were played in home venues and margin of wins were close. 
# Which means most of the matches were exciting till end

In [None]:
heatmap = alt.Chart(IPL_match).mark_rect().encode(alt.X('neutral_venue:Q', bin=True),
                                               alt.Y('result_margin:Q', bin=True))
circles = heatmap.mark_point().encode(alt.ColorValue('lightgray'),alt.Size('count()',legend=alt.Legend(title='Records in Selection')),
                                             tooltip='count():Q')

heatmap + circles

# Matches won at each location by different teams every year

In [None]:
IPL_match['year'] = pd.to_datetime(IPL_match['date']).dt.year
IPL_match1 = IPL_match.groupby(['year','city','winner']).count()
IPL_match2 = IPL_match1.reset_index()[['year','city','winner','id']].sort_values(by = 'year', ascending = True)
IPL_match3 = IPL_match2.drop_duplicates(subset=['city'])

fig = px.scatter(IPL_match2, x='city', y="id",color='winner', animation_frame="year", width=1800, height=400)
fig.update_xaxes(categoryorder='array', categoryarray= IPL_match3['city']) 
fig.show()

*Double click on the chart after moving the slider to make it refresh*

# Man of Matches won at each location by different teams every year

What will become clear in the below diagrams is that no one person has mare than 2-3 man of the mataches any year. So clearly it is the team that helps win IPL and not individuals

In [None]:
IPL_match1 = IPL_match.groupby(['year','city','player_of_match']).count()
IPL_match2 = IPL_match1.reset_index()[['year','city','player_of_match','id']].sort_values(by = 'year', ascending = True)
IPL_match3 = IPL_match2.drop_duplicates(subset=['city'])

fig = px.scatter(IPL_match2, x='city', y="id",color='player_of_match', animation_frame="year", width=1800, height=400)
fig.update_xaxes(categoryorder='array', categoryarray= IPL_match3['city'])    
fig.show()

*Double click on the chart after moving the slider to make it refresh*

In [None]:
fig = px.scatter(IPL_match2, x='player_of_match', y="id",color='city', animation_frame="year", width=1800, height=400)
fig.update_xaxes(categoryorder='array', categoryarray= IPL_match3['player_of_match'])     
fig.show()

*Double click on the chart after moving the slider to make it refresh*

# Impact of Toss on the outcome

In [None]:
def WinCheck(row):
    if row['toss_winner'] == row['winner']:
        return True
    else:
        return False

IPL_match['TossWinGameWin'] = IPL_match.apply(lambda row:WinCheck(row), axis = 1)

IPL_match1 = IPL_match.groupby(['year','toss_winner','TossWinGameWin']).count()
IPL_match2 = IPL_match1.reset_index()[['year','toss_winner','TossWinGameWin','id']].sort_values(by = 'year', ascending = True)
IPL_match3 = IPL_match2.drop_duplicates(subset=['toss_winner'])
IPL_match2 = IPL_match2.rename(columns={"id": "count"})

In [None]:
IPL_match2.head()

In [None]:
fig = px.scatter(IPL_match2, x='toss_winner', y="TossWinGameWin", color = 'count', size = 'count', animation_frame="year", width=1800, height=400)
fig.update_xaxes(categoryorder='array', categoryarray= IPL_match3['toss_winner'])
fig.show()

# In many Stadiums, teams who win the toss always field

In [None]:
IPL_match1 = IPL_match.groupby(['year','city','toss_decision']).count()
IPL_match2 = IPL_match1.reset_index()[['year','city','toss_decision','id']].sort_values(by = 'year', ascending = True)
IPL_match3 = IPL_match2.drop_duplicates(subset=['city'])
IPL_match2 = IPL_match2.rename(columns={"id": "count"})

fig = px.scatter(IPL_match2, x='city', y="count",color='toss_decision', animation_frame="year", width=1800, height=400)
fig.update_xaxes(categoryorder='array', categoryarray= IPL_match3['city'])     
fig.show()

# If you win the toss you should field. Also if you field first then chances of winning is more but by a narrow margin.

# In 2019 at Bangalore, Chennai, Mumbai and Kolkata, team who won the toss always fielded

In [None]:
!pip install pivottablejs
from pivottablejs import pivot_ui
pivot_ui(IPL_ball)

# Best Batsmen from each teams

In [None]:
IPL_ball_bat = IPL_ball[['batting_team','batsman','batsman_runs']]
IPL1 = IPL_ball[IPL_ball['non_boundary'] != 0][['batsman','batsman_runs','id']].groupby(['batsman','batsman_runs']).count()

In [None]:
IPL_ball_bat = IPL_ball[['batting_team','batsman','batsman_runs']].groupby(['batting_team','batsman']).sum()
IPL_ball_bat = IPL_ball_bat.reset_index().sort_values(by = 'batsman_runs', ascending = False)

In [None]:
IPL_ball.head()

In [None]:
IPL_ball_bat2 = IPL_ball[['batting_team','batsman','batsman_runs']].groupby(['batting_team','batsman']).sum()
IPL_ball_bat2 = IPL_ball_bat2.reset_index().sort_values(by = 'batsman_runs', ascending = False)


IPL_ball_bat3 = IPL_ball[['batting_team','batsman','id','is_wicket']].groupby(['batting_team','batsman','id']).count()
IPL_ball_bat3 = IPL_ball_bat3.reset_index().groupby(['batting_team','batsman']).count()
IPL_ball_bat3 = IPL_ball_bat3.reset_index().sort_values(by = 'id', ascending = False)


IPL_bat_lst = IPL_ball_bat2.merge(IPL_ball_bat3,left_on=['batting_team','batsman'], right_on=['batting_team','batsman'])
IPL_bat_lst = IPL_bat_lst.drop(['is_wicket'], axis=1)

IPL_bat_lst = IPL_bat_lst.rename(columns={"id": "Matches"})

In [None]:
IPL_bat_lst

In [None]:
IPL_bat_lst['Average'] = IPL_bat_lst['batsman_runs']/IPL_bat_lst['Matches']
IPL_bat_lst = IPL_bat_lst.sort_values(by = 'Average', ascending = False)

fig = px.scatter(IPL_bat_lst, x='Average', y="Matches",size='batsman_runs', color="batsman", width=1000, height=400)
#fig.update_xaxes(categoryorder='array', categoryarray= IPL_ball_bat1['batsman'])     
fig.show()

# From the graph it is clear that batsman who have played more than 50 matches and have an average over 27 are the consistent performers and an asset to the team

In [None]:
IPL_best_bat_lst = IPL_bat_lst[(IPL_bat_lst['Average'] > 27) & (IPL_bat_lst['Matches'] > 50)]
IPL_best_bat_lst['batsman'][0:15]

# Highest scorers in IPL from each teams

In [None]:
IPL_ball_bat1 = IPL_ball_bat[0:30]

fig = px.scatter(IPL_ball_bat1, x='batsman', y="batsman_runs",size='batsman_runs', color="batting_team", width=1000, height=400)
#fig.update_xaxes(categoryorder='array', categoryarray= IPL_ball_bat1['batsman'])     
fig.show()

In [None]:
IPL_ball1 = IPL_ball.sort_values(by='over')
IPL_ball2 = IPL_ball1[['batsman','batting_team','over','batsman_runs']]
IPL_openr_lst = IPL_ball1[IPL_ball2['over'] < 1]['batsman']
IPL_openr_lst = IPL_openr_lst.drop_duplicates().to_frame()
IPL_openr_lst = IPL_openr_lst.merge(IPL_ball_bat,left_on='batsman', right_on='batsman')
IPL_openr_lst = IPL_openr_lst.groupby('batsman').sum().sort_values(by = 'batsman_runs', ascending = False)
IPL_openr_lst = IPL_openr_lst.reset_index()

# Calculating Best Averages and Ability to Bat in multiple position (Open, Middle order and Slog overs)

In [None]:
IPL_openr_lst[0:15]

In [None]:
IPL_ball1 = IPL_ball.sort_values(by='over')
IPL_ball2 = IPL_ball1[['batsman','batting_team','over','batsman_runs']]

IPL_mid_lst = IPL_ball2[(IPL_ball2['over'] > 10) & (IPL_ball2['over'] < 15)]['batsman']
IPL_mid_lst = IPL_mid_lst.drop_duplicates().to_frame()
IPL_mid_lst = IPL_mid_lst.merge(IPL_ball_bat,left_on='batsman', right_on='batsman')
IPL_mid_lst = IPL_mid_lst.groupby('batsman').sum().sort_values(by = 'batsman_runs', ascending = False)
IPL_mid_lst = IPL_mid_lst.reset_index()

In [None]:
IPL_ball1 = IPL_ball.sort_values(by='over')
IPL_ball2 = IPL_ball1[['batsman','batting_team','over','batsman_runs']]

IPL_slog_lst = IPL_ball2[(IPL_ball2['over'] > 18)]['batsman']
IPL_slog_lst = IPL_slog_lst.drop_duplicates().to_frame()
IPL_slog_lst = IPL_slog_lst.merge(IPL_ball_bat,left_on='batsman', right_on='batsman')
IPL_slog_lst = IPL_slog_lst.groupby('batsman').sum().sort_values(by = 'batsman_runs', ascending = False)
IPL_slog_lst = IPL_slog_lst.reset_index()

In [None]:
new1  = IPL_bat_lst.describe()['Average'].count

In [None]:
IPL_mid_lst

In [None]:
index = 0

Openlist = []
Midlist = []
Sloglist = []

for index1, row1 in IPL_bat_lst.iterrows():
    loc1 = 0
    loc2 = 0
    loc3 = 0
    
    for index2, row2 in IPL_openr_lst.iterrows():
        if row1['batsman'] == row2['batsman']:
            Openlist.append(1)
            loc1=1
            
    for index3, row3 in IPL_mid_lst.iterrows():
        if row1['batsman'] == row3['batsman']:
            Midlist.append(1)
            loc2=1

    for index4, row4 in IPL_slog_lst.iterrows():
        if row1['batsman'] == row4['batsman']:
            Sloglist.append(1)
            loc3=1
            
    if loc1 == 0:
        Openlist.append(0)
        
    if loc2 == 0:
        Midlist.append(0)
        
    if loc3 == 0:
        Sloglist.append(0)          
        

In [None]:
IPL_bat_lst['Openlist'] = pd.Series(Openlist).values
IPL_bat_lst['Midlist'] = pd.Series(Midlist).values
IPL_bat_lst['Sloglist'] = pd.Series(Sloglist).values

# Best Batsman in IPL based on averages and ability to bat at multiple position

In [None]:
IPL_bat_lst[0:10]

# Best Bowlers in IPL Team

In [None]:
IPL2 = IPL_ball[IPL_ball['is_wicket'] != 0][['bowling_team','bowler','dismissal_kind']].groupby(['bowling_team','bowler']).count()
IPL2 = IPL2.reset_index().sort_values(by = 'dismissal_kind', ascending = False)
IPL3 = IPL2[0:20]

fig = px.scatter(IPL3, x='bowler', y="dismissal_kind",size='dismissal_kind', color="bowling_team", width=1000, height=400)
#fig.update_xaxes(categoryorder='array', categoryarray= IPL_ball_bat1['batsman'])     
fig.show()

# Best position for each player

In [None]:
IPL_ball1 = IPL_ball.sort_values(by=['id','batting_team','over'], ascending= True)
IPL_ball2 = IPL_ball1[['batsman','batting_team','id','batsman_runs']].groupby(['id','batting_team','batsman']).sum()
IPL_ball2 = IPL_ball2.reset_index()

In [None]:
Team = " "
IPL_pos_list = []

for index1, row1 in IPL_ball2.iterrows():    
    if row1['batting_team'] != Team:
        position = 0
        Team = row1['batting_team']
    else:
        position = position + 1 

    IPL_pos_list.append(position)
   
    
IPL_ball2['Position'] = pd.Series(IPL_pos_list).values
IPL_ball_grp_runs = IPL_ball2.groupby(['batting_team','batsman','Position']).sum()
IPL_ball_grp_count = IPL_ball2.groupby(['batting_team','batsman','Position']).count()
IPL_ball_grp_runs = IPL_ball_grp_runs.reset_index()
IPL_ball_grp_count = IPL_ball_grp_count.reset_index()

In [None]:
IPL_position = IPL_ball_grp_runs[['batting_team','batsman','Position','batsman_runs']]
IPL_position['matches'] = IPL_ball_grp_count['batsman_runs']

IPL_position['averages'] = IPL_position['batsman_runs']/IPL_position['matches']

In [None]:
fig = px.scatter(IPL_position, x='batsman', y="Position", color = 'batsman_runs', size = 'averages', animation_frame="batting_team", width=1800, height=400)
fig.show()

In [None]:
batsman1 = IPL_position[IPL_position['batting_team'] == 'Mumbai Indians'].groupby('batsman').sum()
IPL_position1 = IPL_position[['batting_team','batsman','Position','averages']]
IPL_position1 = IPL_position1.groupby(['batting_team','batsman','Position'])['averages'].agg({'max'})
IPL_position1