# IPL EDA & Deep learning Transformers

## Step 1: Data Loading

In [112]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [113]:
# Define the file path
balls_file_path = "/content/drive/MyDrive/IPL/ball_by_ball.csv"
match_file_path="/content/drive/MyDrive/IPL/matches.csv"

# Load the CSV file
ball_by_ball = pd.read_csv(balls_file_path)
matches = pd.read_csv(match_file_path)

In [114]:
ball_by_ball.head()

Unnamed: 0,ID,Innings,Overs,BallNumber,Batter,Bowler,NonStriker,ExtraType,BatsmanRun,ExtrasRun,TotalRun,NonBoundary,IsWicketDelivery,PlayerOut,Kind,FieldersInvolved,BattingTeam
0,1082591,1,0,1,DA Warner,TS Mills,S Dhawan,,0,0,0,0,0,,,,Sunrisers Hyderabad
1,1082591,1,0,2,DA Warner,TS Mills,S Dhawan,,0,0,0,0,0,,,,Sunrisers Hyderabad
2,1082591,1,0,3,DA Warner,TS Mills,S Dhawan,,4,0,4,0,0,,,,Sunrisers Hyderabad
3,1082591,1,0,4,DA Warner,TS Mills,S Dhawan,,0,0,0,0,0,,,,Sunrisers Hyderabad
4,1082591,1,0,5,DA Warner,TS Mills,S Dhawan,wides,0,2,2,0,0,,,,Sunrisers Hyderabad


In [115]:
matches.head()

Unnamed: 0,ID,City,Date,Season,MatchNumber,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,Player_of_Match,Team1Players,Team2Players,Umpire1,Umpire2
0,1082591,Hyderabad,2017-04-05,2017,1,Sunrisers Hyderabad,Royal Challengers Bangalore,"Rajiv Gandhi International Stadium, Uppal",Royal Challengers Bangalore,field,N,Sunrisers Hyderabad,Runs,35,Yuvraj Singh,"['DA Warner', 'S Dhawan', 'MC Henriques', 'Yuv...","['CH Gayle', 'Mandeep Singh', 'TM Head', 'KM J...",AY Dandekar,NJ Llong
1,1082592,Pune,2017-04-06,2017,2,Rising Pune Supergiant,Mumbai Indians,Maharashtra Cricket Association Stadium,Rising Pune Supergiant,field,N,Rising Pune Supergiant,Wickets,7,SPD Smith,"['AM Rahane', 'MA Agarwal', 'SPD Smith', 'BA S...","['PA Patel', 'JC Buttler', 'RG Sharma', 'N Ran...",A Nand Kishore,S Ravi
2,1082593,Rajkot,2017-04-07,2017,3,Gujarat Lions,Kolkata Knight Riders,Saurashtra Cricket Association Stadium,Kolkata Knight Riders,field,N,Kolkata Knight Riders,Wickets,10,CA Lynn,"['JJ Roy', 'BB McCullum', 'SK Raina', 'AJ Finc...","['G Gambhir', 'CA Lynn', 'RV Uthappa', 'MK Pan...",Nitin Menon,CK Nandan
3,1082594,Indore,2017-04-08,2017,4,Kings XI Punjab,Rising Pune Supergiant,Holkar Cricket Stadium,Kings XI Punjab,field,N,Kings XI Punjab,Wickets,6,GJ Maxwell,"['HM Amla', 'M Vohra', 'WP Saha', 'AR Patel', ...","['AM Rahane', 'MA Agarwal', 'SPD Smith', 'BA S...",AK Chaudhary,C Shamshuddin
4,1082595,Bengaluru,2017-04-08,2017,5,Royal Challengers Bangalore,Delhi Daredevils,M.Chinnaswamy Stadium,Royal Challengers Bangalore,bat,N,Royal Challengers Bangalore,Runs,15,KM Jadhav,"['CH Gayle', 'SR Watson', 'Mandeep Singh', 'KM...","['AP Tare', 'SW Billings', 'KK Nair', 'SV Sams...",S Ravi,VK Sharma


## Step 2: Data Preprocessing

Sorting matches dataset and ball_by_ball dataset by season

In [116]:
matches = matches.sort_values(by=['Season', 'ID']).reset_index(drop=True)
ball_by_ball = ball_by_ball.sort_values(by=['ID', 'Innings', 'Overs', 'BallNumber']).reset_index(drop=True)

Handle Missing Values

In [117]:
print(matches.isnull().sum())
print(ball_by_ball.isnull().sum())

ID                 0
City               0
Date               0
Season             0
MatchNumber        0
Team1              0
Team2              0
Venue              0
TossWinner         0
TossDecision       0
SuperOver          0
WinningTeam        0
WonBy              0
Margin             0
Player_of_Match    5
Team1Players       0
Team2Players       0
Umpire1            0
Umpire2            0
dtype: int64
ID                       0
Innings                  0
Overs                    0
BallNumber               0
Batter                   0
Bowler                   0
NonStriker               0
ExtraType           246795
BatsmanRun               0
ExtrasRun                0
TotalRun                 0
NonBoundary              0
IsWicketDelivery         0
PlayerOut           247970
Kind                247970
FieldersInvolved    251566
BattingTeam              0
dtype: int64


In [118]:
matches.update(matches[['Player_of_Match']].fillna('Unknown'))
ball_by_ball.update(ball_by_ball[['ExtraType']].fillna('NoExtra'))
ball_by_ball.update(ball_by_ball[['PlayerOut']].fillna('NotOut'))
ball_by_ball.update(ball_by_ball[['Kind']].fillna('None'))
ball_by_ball.update(ball_by_ball[['FieldersInvolved']].fillna('None'))

In [119]:
print(matches.isnull().sum())
print(ball_by_ball.isnull().sum())

ID                 0
City               0
Date               0
Season             0
MatchNumber        0
Team1              0
Team2              0
Venue              0
TossWinner         0
TossDecision       0
SuperOver          0
WinningTeam        0
WonBy              0
Margin             0
Player_of_Match    0
Team1Players       0
Team2Players       0
Umpire1            0
Umpire2            0
dtype: int64
ID                  0
Innings             0
Overs               0
BallNumber          0
Batter              0
Bowler              0
NonStriker          0
ExtraType           0
BatsmanRun          0
ExtrasRun           0
TotalRun            0
NonBoundary         0
IsWicketDelivery    0
PlayerOut           0
Kind                0
FieldersInvolved    0
BattingTeam         0
dtype: int64


Create a mapping of seasons to sequential numbers and replace Season column with mapped values

In [120]:
season_mapping = {
    '2007/08': 1, '2009': 2, '2009/10': 3, '2011': 4, '2012': 5, '2013': 6,
    '2014': 7, '2015': 8, '2016': 9, '2017': 10, '2018': 11, '2019': 12,
    '2020/21': 13, '2021': 14, '2022': 15, '2023': 16, '2024': 17
}
matches['Season'] = matches['Season'].map(season_mapping)
print(matches['Season'].unique())

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17]


Remove duplicates if any

In [121]:
matches.drop_duplicates(inplace=True)
ball_by_ball.drop_duplicates(inplace=True)

Convert the Date column to datetime format

In [122]:
matches['Date'] = pd.to_datetime(matches['Date'])

Define a dictionary for team name replacements

In [123]:
team_name_mapping = {
    'Delhi Daredevils': 'Delhi Capitals',
    'Deccan Chargers': 'Sunrisers Hyderabad',
    'Rising Pune Supergiant': 'Rising Pune Supergiants'
}

# Apply replacements
matches['Team1'] = matches['Team1'].replace(team_name_mapping)
matches['Team2'] = matches['Team2'].replace(team_name_mapping)
matches['WinningTeam'] = matches['WinningTeam'].replace(team_name_mapping)
matches['TossWinner'] = matches['TossWinner'].replace(team_name_mapping)
ball_by_ball['BattingTeam'] = ball_by_ball['BattingTeam'].replace(team_name_mapping)

Merge ball-by-ball data with match data on 'ID'

In [124]:
merged_data = ball_by_ball.merge(matches, on='ID', how='inner')

In [125]:
print(merged_data.columns)
print(f"Total Rows in merged_data: {len(merged_data)}")
pd.set_option('display.max_columns', None)
merged_data.head()

Index(['ID', 'Innings', 'Overs', 'BallNumber', 'Batter', 'Bowler',
       'NonStriker', 'ExtraType', 'BatsmanRun', 'ExtrasRun', 'TotalRun',
       'NonBoundary', 'IsWicketDelivery', 'PlayerOut', 'Kind',
       'FieldersInvolved', 'BattingTeam', 'City', 'Date', 'Season',
       'MatchNumber', 'Team1', 'Team2', 'Venue', 'TossWinner', 'TossDecision',
       'SuperOver', 'WinningTeam', 'WonBy', 'Margin', 'Player_of_Match',
       'Team1Players', 'Team2Players', 'Umpire1', 'Umpire2'],
      dtype='object')
Total Rows in merged_data: 260920


Unnamed: 0,ID,Innings,Overs,BallNumber,Batter,Bowler,NonStriker,ExtraType,BatsmanRun,ExtrasRun,TotalRun,NonBoundary,IsWicketDelivery,PlayerOut,Kind,FieldersInvolved,BattingTeam,City,Date,Season,MatchNumber,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,Player_of_Match,Team1Players,Team2Players,Umpire1,Umpire2
0,335982,1,0,1,SC Ganguly,P Kumar,BB McCullum,legbyes,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,Bangalore,2008-04-18,1,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",Asad Rauf,RE Koertzen
1,335982,1,0,2,BB McCullum,P Kumar,SC Ganguly,NoExtra,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,Bangalore,2008-04-18,1,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",Asad Rauf,RE Koertzen
2,335982,1,0,3,BB McCullum,P Kumar,SC Ganguly,wides,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,Bangalore,2008-04-18,1,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",Asad Rauf,RE Koertzen
3,335982,1,0,4,BB McCullum,P Kumar,SC Ganguly,NoExtra,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,Bangalore,2008-04-18,1,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",Asad Rauf,RE Koertzen
4,335982,1,0,5,BB McCullum,P Kumar,SC Ganguly,NoExtra,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,Bangalore,2008-04-18,1,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",Asad Rauf,RE Koertzen


## Feature Engineering for IPL Prediction Model

Match Context Feature Engineering

In [126]:
columns_to_drop = ["City", "Date", "MatchNumber", "Umpire1", "Umpire2", "ExtraType"]
merged_data.drop(columns=columns_to_drop, inplace=True)
# Display updated columns
print("\nUpdated Columns after dropping unnecessary ones:")
print(merged_data.columns)
merged_data.head()


Updated Columns after dropping unnecessary ones:
Index(['ID', 'Innings', 'Overs', 'BallNumber', 'Batter', 'Bowler',
       'NonStriker', 'BatsmanRun', 'ExtrasRun', 'TotalRun', 'NonBoundary',
       'IsWicketDelivery', 'PlayerOut', 'Kind', 'FieldersInvolved',
       'BattingTeam', 'Season', 'Team1', 'Team2', 'Venue', 'TossWinner',
       'TossDecision', 'SuperOver', 'WinningTeam', 'WonBy', 'Margin',
       'Player_of_Match', 'Team1Players', 'Team2Players'],
      dtype='object')


Unnamed: 0,ID,Innings,Overs,BallNumber,Batter,Bowler,NonStriker,BatsmanRun,ExtrasRun,TotalRun,NonBoundary,IsWicketDelivery,PlayerOut,Kind,FieldersInvolved,BattingTeam,Season,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,Player_of_Match,Team1Players,Team2Players
0,335982,1,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D..."
1,335982,1,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D..."
2,335982,1,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D..."
3,335982,1,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D..."
4,335982,1,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D..."


In [127]:
match_total_runs = merged_data.groupby(['ID', 'BattingTeam'])['BatsmanRun'].sum().to_frame(name="TeamTotalRuns")
merged_data = merged_data.merge(match_total_runs, on=['ID', 'BattingTeam'], how='left')
print(merged_data.columns)
venue_avg_runs = merged_data.groupby('Venue')['TeamTotalRuns'].mean().rename("VenueAvgRuns")
print(venue_avg_runs)
merged_data.head()

Index(['ID', 'Innings', 'Overs', 'BallNumber', 'Batter', 'Bowler',
       'NonStriker', 'BatsmanRun', 'ExtrasRun', 'TotalRun', 'NonBoundary',
       'IsWicketDelivery', 'PlayerOut', 'Kind', 'FieldersInvolved',
       'BattingTeam', 'Season', 'Team1', 'Team2', 'Venue', 'TossWinner',
       'TossDecision', 'SuperOver', 'WinningTeam', 'WonBy', 'Margin',
       'Player_of_Match', 'Team1Players', 'Team2Players', 'TeamTotalRuns'],
      dtype='object')
Venue
Arun Jaitley Stadium                                                     155.768474
Arun Jaitley Stadium, Delhi                                              181.742367
Barabati Stadium                                                         157.056637
Barsapara Cricket Stadium, Guwahati                                      163.633288
Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow    151.274444
Brabourne Stadium                                                        163.487647
Brabourne Stadium, Mumbai              

Unnamed: 0,ID,Innings,Overs,BallNumber,Batter,Bowler,NonStriker,BatsmanRun,ExtrasRun,TotalRun,NonBoundary,IsWicketDelivery,PlayerOut,Kind,FieldersInvolved,BattingTeam,Season,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,Player_of_Match,Team1Players,Team2Players,TeamTotalRuns
0,335982,1,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
1,335982,1,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
2,335982,1,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
3,335982,1,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
4,335982,1,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205


In [129]:
merged_data.head()

Unnamed: 0,ID,Innings,Overs,BallNumber,Batter,Bowler,NonStriker,BatsmanRun,ExtrasRun,TotalRun,NonBoundary,IsWicketDelivery,PlayerOut,Kind,FieldersInvolved,BattingTeam,Season,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,Player_of_Match,Team1Players,Team2Players,TeamTotalRuns
0,335982,1,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
1,335982,1,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
2,335982,1,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
3,335982,1,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205
4,335982,1,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205


In [130]:
def calculate_win_percentage(df):
    total_wins = df['WinningTeam'].value_counts()
    total_played = df['Team1'].value_counts().add(df['Team2'].value_counts(), fill_value=0)
    win_percentage = (total_wins / total_played).fillna(0) * 100
    return win_percentage.rename("WinPercentage")

def calculate_toss_impact(df):
    df['TossImpact'] = (df['TossWinner'] == df['WinningTeam']).astype(int)
    return df

win_percentage = calculate_win_percentage(merged_data)

merged_data = merged_data.merge(win_percentage.rename("Team1WinPercentage"), left_on='Team1', right_index=True, how='left')
merged_data = merged_data.merge(win_percentage.rename("Team2WinPercentage"), left_on='Team2', right_index=True, how='left')

merged_data = merged_data.merge(venue_avg_runs, on='Venue', how='left')

merged_data = calculate_toss_impact(merged_data)

merged_data['IsSuperOver'] = (merged_data['SuperOver'] == 'Y').astype(int)

merged_data.head()

Unnamed: 0,ID,Innings,Overs,BallNumber,Batter,Bowler,NonStriker,BatsmanRun,ExtrasRun,TotalRun,NonBoundary,IsWicketDelivery,PlayerOut,Kind,FieldersInvolved,BattingTeam,Season,Team1,Team2,Venue,TossWinner,TossDecision,SuperOver,WinningTeam,WonBy,Margin,Player_of_Match,Team1Players,Team2Players,TeamTotalRuns,Team1WinPercentage,Team2WinPercentage,VenueAvgRuns,TossImpact,IsSuperOver
0,335982,1,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205,47.382565,51.656894,152.349366,0,0
1,335982,1,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205,47.382565,51.656894,152.349366,0,0
2,335982,1,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205,47.382565,51.656894,152.349366,0,0
3,335982,1,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205,47.382565,51.656894,152.349366,0,0
4,335982,1,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,0,0,NotOut,,,Kolkata Knight Riders,1,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,Royal Challengers Bangalore,field,N,Kolkata Knight Riders,Runs,140,BB McCullum,"['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis...","['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D...",205,47.382565,51.656894,152.349366,0,0
