# Loading Data:

In [2]:
import numpy as np
import pandas as pd
import import_ipynb
from datetime import datetime, timedelta

In [3]:
from P01_Pre_Processing import matches, deliveries, trimSpaceInValues, title, latest_teams

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1095 entries, 0 to 1094
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               1095 non-null   int64  
 1   season           1095 non-null   object 
 2   city             1044 non-null   object 
 3   date             1095 non-null   object 
 4   match_type       1095 non-null   object 
 5   player_of_match  1090 non-null   object 
 6   venue            1095 non-null   object 
 7   team1            1095 non-null   object 
 8   team2            1095 non-null   object 
 9   toss_winner      1095 non-null   object 
 10  toss_decision    1095 non-null   object 
 11  winner           1090 non-null   object 
 12  result           1095 non-null   object 
 13  result_margin    1076 non-null   float64
 14  target_runs      1092 non-null   float64
 15  target_overs     1092 non-null   float64
 16  super_over       1095 non-null   object 
 17  method        

In [4]:
all_matches = pd.read_csv('../data/raw/all_matches.csv')

In [5]:
all_matches = latest_teams(all_matches, ['Team 1', 'Team 2'])
all_matches = trimSpaceInValues(all_matches)
all_matches = title(all_matches)


In [6]:
all_matches.head()

Unnamed: 0,Match,Team 1,Team 2,Date,Time,Season
0,Match 1,Royal Challengers Bangalore,Kolkata Knight Riders,2008-04-18,8:00 pm,2008
1,Match 2,Punjab Kings,Chennai Super Kings,2008-04-19,4:00 pm,2008
2,Match 3,Delhi Capitals,Rajasthan Royals,2008-04-19,8:00 pm,2008
3,Match 4,Kolkata Knight Riders,Sunrisers Hyderabad,2008-04-20,4:00 pm,2008
4,Match 5,Mumbai Indians,Royal Challengers Bangalore,2008-04-20,8:00 pm,2008


In [7]:
matches.head()

Unnamed: 0,Id,Season,City,Date,Match_Type,Player_Of_Match,Venue,Team1,Team2,Toss_Winner,Toss_Decision,Winner,Result,Result_Margin,Target_Runs,Target_Overs,Super_Over,Method,Umpire1,Umpire2
0,335982,2008,Bangalore,2008-04-18,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,Field,Kolkata Knight Riders,Runs,140,223,20,N,Normal,Asad Rauf,RE Koertzen
1,335983,2008,Chandigarh,2008-04-19,League,MEK Hussey,Punjab Cricket Association IS Bindra Stadium,Punjab Kings,Chennai Super Kings,Chennai Super Kings,Bat,Chennai Super Kings,Runs,33,241,20,N,Normal,MR Benson,SL Shastri
2,335984,2008,Delhi,2008-04-19,League,MF Maharoof,Arun Jaitley Stadium,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,Bat,Delhi Capitals,Wickets,9,130,20,N,Normal,Aleem Dar,GA Pratapkumar
3,335985,2008,Mumbai,2008-04-20,League,MV Boucher,Wankhede Stadium,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,Bat,Royal Challengers Bangalore,Wickets,5,166,20,N,Normal,SJ Davis,DJ Harper
4,335986,2008,Kolkata,2008-04-20,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,Bat,Kolkata Knight Riders,Wickets,5,111,20,N,Normal,BF Bowden,K Hariharan


# Processing Data:

In [8]:
all_matches['Time'].unique()

array(['8:00 pm', '4:00 pm', '6:30 pm', '2:30 pm', '7:00 pm', '7:30 pm',
       '07:30 PM', '03:30 PM', '3:30 pm'], dtype=object)

In [9]:
def clean_time(time):
    if pd.isna(time):
        return None
    time = str(time).strip().lower()
    time = time.replace('.', '')
    time = time.replace(' ', '')
    time = time.replace('am', ' am').replace('pm', ' pm')
    time = time.lstrip('0') 
    return time


In [10]:
all_matches['Time'] = all_matches['Time'].apply(clean_time)

In [11]:
# After careful examination, from espncricinfo.com and iplt20.com, we figured out the Matc 24 data was missing in iplt20.com data

In [12]:
# --- Step 1: Create the new match row ---
new_row = {
    'Match': 'Match 24',
    'Team 1': 'Chennai Super Kings',
    'Team 2': 'Pune Warriors India',
    'Date': pd.to_datetime('2012-04-19'),
    'Time': '8:00 pm',
    'Season': 2012
}

In [13]:
# --- Step 2: Insert at index 275 ---
before = all_matches.iloc[:275]
after = all_matches.iloc[275:]
all_matches = pd.concat([before, pd.DataFrame([new_row]), after], ignore_index=True)

In [14]:
# --- Step 3: Extract numeric match number safely ---
all_matches['Match_Num'] = (
    all_matches['Match']
    .astype(str)
    .str.extract(r'(\d+)')[0]
)
all_matches['Match_Num'] = pd.to_numeric(all_matches['Match_Num'], errors='coerce')

In [15]:
# --- Step 4: Increment match numbers for Season 2012, >= 24, but not the new row ---
new_row_index = 275
mask = (
    (all_matches['Season'] == 2012) &
    (all_matches['Match_Num'] >= 24) &
    (all_matches.index != new_row_index)
)
all_matches.loc[mask, 'Match_Num'] = all_matches.loc[mask, 'Match_Num'] + 1

In [16]:
# --- Step 5: Rebuild safely ---
all_matches['Match_Num'] = all_matches['Match_Num'].astype('Int64')
all_matches.loc[all_matches['Match_Num'].notna(), 'Match'] = 'Match ' + all_matches['Match_Num'].astype(str)
all_matches.drop(columns=['Match_Num'], inplace=True)

In [17]:
# Due to rain, reserve day was used. Matches uses the scheduled date

In [18]:
matches.loc[matches['Id'] == 734043, 'Date'] = pd.to_datetime('2014-05-28')

In [19]:
# Step 1: Convert to datetime safely
all_matches['Date'] = pd.to_datetime(all_matches['Date'], errors='coerce')

# Step 2:Keep only the date part (drop the time component)
all_matches['Date'] = all_matches['Date'].dt.date

In [20]:
display(all_matches.loc[270:280])

Unnamed: 0,Match,Team 1,Team 2,Date,Time,Season
270,Match 19,Mumbai Indians,Delhi Capitals,2012-04-16,8:00 pm,2012
271,Match 20,Rajasthan Royals,Sunrisers Hyderabad,2012-04-17,4:00 pm,2012
272,Match 21,Royal Challengers Bangalore,Pune Warriors India,2012-04-17,8:00 pm,2012
273,Match 22,Punjab Kings,Kolkata Knight Riders,2012-04-18,8:00 pm,2012
274,Match 23,Delhi Capitals,Sunrisers Hyderabad,2012-04-19,4:00 pm,2012
275,Match 24,Chennai Super Kings,Pune Warriors India,2012-04-19,8:00 pm,2012
276,Match 25,Punjab Kings,Royal Challengers Bangalore,2012-04-20,8:00 pm,2012
277,Match 26,Chennai Super Kings,Rajasthan Royals,2012-04-21,4:00 pm,2012
278,Match 27,Delhi Capitals,Pune Warriors India,2012-04-21,8:00 pm,2012
279,Match 28,Mumbai Indians,Punjab Kings,2012-04-22,4:00 pm,2012


# Concatenating:

In [21]:
all_matches['Date'] = pd.to_datetime(all_matches['Date'])
all_matches['match_key'] = all_matches.apply(lambda x: tuple(sorted([x['Team 1'], x['Team 2']])), axis=1)
all_matches.rename(columns={'Match': 'Match_No', 'Time': 'Time'}, inplace=True)

In [22]:
matches['Date'] = pd.to_datetime(matches['Date'])
matches['match_key'] = matches.apply(lambda x: tuple(sorted([x['Team1'], x['Team2']])), axis=1)

In [23]:
def merge_match_data(all_matches = all_matches, matches = matches):
    """Merge both datasets using Season, Date, and team match_key."""
    
    merged = pd.merge(
        matches,
        all_matches[['Season', 'Date', 'match_key', 'Time', 'Match_No']],
        on=['Season', 'Date', 'match_key'],
        how='left',
        indicator=True
    )
    
    return merged

In [24]:
def find_unmatched_rows(merged, all_matches = all_matches, matches = matches):
    """Find rows that didn't match in either dataset."""
    used_all = merged.dropna(subset=['Match_No'])[['Season', 'Match_No', 'Date', 'match_key']]
    
    # Unmatched from matches
    unmatched_matches = merged[merged['_merge'] == 'left_only']
    
    # Unmatched from all_matches
    all_matches_keys = all_matches[['Season', 'Match_No', 'Date', 'match_key']]
    used_keys = used_all[['Season', 'Match_No', 'Date', 'match_key']]
    unmatched_all_matches = all_matches_keys.merge(used_keys, on=['Season', 'Match_No', 'Date', 'match_key'], how='left', indicator=True)
    unmatched_all_matches = unmatched_all_matches[unmatched_all_matches['_merge'] == 'left_only']

    unmatched_matches = pd.DataFrame(unmatched_matches)
    unmatched_all_matches = pd.DataFrame(unmatched_all_matches)
    
    return unmatched_matches, unmatched_all_matches

In [25]:
matches = merge_match_data(all_matches, matches)

unmatched_matches, unmatched_all_matches = find_unmatched_rows(matches, all_matches, matches)

In [26]:
print("✅ Merged dataset shape:", matches.shape)
print("❌ Unmatched in matches:", len(unmatched_matches))
print("❌ Unmatched in all_matches:", len(unmatched_all_matches))

✅ Merged dataset shape: (1095, 24)
❌ Unmatched in matches: 0
❌ Unmatched in all_matches: 11


In [27]:
for i in range(2008, 2025):
    count = len(all_matches[all_matches['Season'] == i])
    print(f"{i}: {count} matches")

2008: 59 matches
2009: 59 matches
2010: 60 matches
2011: 74 matches
2012: 76 matches
2013: 76 matches
2014: 60 matches
2015: 60 matches
2016: 60 matches
2017: 60 matches
2018: 60 matches
2019: 60 matches
2020: 60 matches
2021: 60 matches
2022: 74 matches
2023: 74 matches
2024: 74 matches


In [28]:
unmatched_all_matches

Unnamed: 0,Season,Match_No,Date,match_key,_merge
46,2008,Match 47,2008-05-22,"(Delhi Capitals, Kolkata Knight Riders)",left_only
65,2009,Match 7,2009-04-21,"(Mumbai Indians, Rajasthan Royals)",left_only
71,2009,Match 13,2009-04-25,"(Chennai Super Kings, Kolkata Knight Riders)",left_only
197,2011,Match 20,2011-04-19,"(Rajasthan Royals, Royal Challengers Bangalore)",left_only
283,2012,Match 32,2012-04-24,"(Kolkata Knight Riders, Sunrisers Hyderabad)",left_only
285,2012,Match 34,2012-04-25,"(Chennai Super Kings, Royal Challengers Bangal...",left_only
488,2015,Match 25,2015-04-26,"(Kolkata Knight Riders, Rajasthan Royals)",left_only
612,2017,Match 29,2017-04-25,"(Royal Challengers Bangalore, Sunrisers Hydera...",left_only
1094,2024,Match 63,2024-05-13,"(Gujarat Titans, Kolkata Knight Riders)",left_only
1097,2024,Match 66,2024-05-16,"(Gujarat Titans, Sunrisers Hyderabad)",left_only


In [29]:
# After mannual checking, all these matches were found to be Abanondened matches with 0 balls bowled

# Local Time:

In [30]:
matches['City'].unique()

array(['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
       'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
       'Centurion', 'East London', 'Johannesburg', 'Kimberley',
       'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
       'Kochi', 'Indore', 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi',
       'Abu Dhabi', 'Sharjah', 'Dubai', 'Rajkot', 'Kanpur', 'Bengaluru',
       'Navi Mumbai', 'Lucknow', 'Guwahati', 'Mohali'], dtype=object)

In [31]:
matches['Time'].unique()

array(['8:00 pm', '4:00 pm', '6:30 pm', '2:30 pm', '7:00 pm', '7:30 pm',
       '3:30 pm'], dtype=object)

In [32]:
city_to_offset = {
    # India (no offset)
    'Bangalore': 0, 'Chandigarh': 0, 'Delhi': 0, 'Mumbai': 0, 'Kolkata': 0,
    'Jaipur': 0, 'Hyderabad': 0, 'Chennai': 0, 'Ahmedabad': 0, 'Cuttack': 0,
    'Nagpur': 0, 'Dharamsala': 0, 'Kochi': 0, 'Indore': 0, 'Visakhapatnam': 0,
    'Pune': 0, 'Raipur': 0, 'Ranchi': 0, 'Rajkot': 0, 'Kanpur': 0,
    'Bengaluru': 0, 'Navi Mumbai': 0, 'Lucknow': 0, 'Guwahati': 0, 'Mohali': 0,

    # South Africa (2009)
    'Cape Town': -3.5, 'Port Elizabeth': -3.5, 'Durban': -3.5,
    'Centurion': -3.5, 'East London': -3.5, 'Johannesburg': -3.5,
    'Kimberley': -3.5, 'Bloemfontein': -3.5,

    # UAE (2014, 2020, 2021)
    'Abu Dhabi': -1.5, 'Dubai': -1.5, 'Sharjah': -1.5
}

In [33]:
def ist_to_local(time_str, city):
    if pd.isna(time_str):
        return None
    
    # normalize string
    time_str = str(time_str).strip().lower()
    offset = city_to_offset.get(city, 0)
    
    try:
        time = datetime.strptime(time_str, "%I:%M %p")
        local_t = time + timedelta(hours=offset)
        # Use %I (zero-padded), then strip leading zeros manually for Windows compatibility
        return local_t.strftime("%I:%M %p").lstrip("0").lower()
    except Exception as e:
        print(f"⚠️ Error parsing time '{time_str}' for city '{city}': {e}")
        return None

In [34]:
matches['Time'] = matches.apply(lambda x: ist_to_local(x['Time'], x['City']), axis=1)

In [35]:
matches = matches[['Id', 'Season', 'Match_No', 'City', 'Date', 'Time', 'Match_Type', 'Player_Of_Match',
                   'Venue', 'Team1', 'Team2', 'Toss_Winner', 'Toss_Decision', 'Winner',
                   'Result', 'Result_Margin', 'Target_Runs', 'Target_Overs', 'Super_Over',
                   'Method', 'Umpire1', 'Umpire2']]

In [36]:
matches

Unnamed: 0,Id,Season,Match_No,City,Date,Time,Match_Type,Player_Of_Match,Venue,Team1,...,Toss_Decision,Winner,Result,Result_Margin,Target_Runs,Target_Overs,Super_Over,Method,Umpire1,Umpire2
0,335982,2008,Match 1,Bangalore,2008-04-18,8:00 pm,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,...,Field,Kolkata Knight Riders,Runs,140,223,20,N,Normal,Asad Rauf,RE Koertzen
1,335983,2008,Match 2,Chandigarh,2008-04-19,4:00 pm,League,MEK Hussey,Punjab Cricket Association IS Bindra Stadium,Punjab Kings,...,Bat,Chennai Super Kings,Runs,33,241,20,N,Normal,MR Benson,SL Shastri
2,335984,2008,Match 3,Delhi,2008-04-19,8:00 pm,League,MF Maharoof,Arun Jaitley Stadium,Delhi Capitals,...,Bat,Delhi Capitals,Wickets,9,130,20,N,Normal,Aleem Dar,GA Pratapkumar
3,335985,2008,Match 5,Mumbai,2008-04-20,8:00 pm,League,MV Boucher,Wankhede Stadium,Mumbai Indians,...,Bat,Royal Challengers Bangalore,Wickets,5,166,20,N,Normal,SJ Davis,DJ Harper
4,335986,2008,Match 4,Kolkata,2008-04-20,4:00 pm,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,...,Bat,Kolkata Knight Riders,Wickets,5,111,20,N,Normal,BF Bowden,K Hariharan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,1426307,2024,Match 69,Hyderabad,2024-05-19,3:30 pm,League,Abhishek Sharma,Rajiv Gandhi International Stadium,Punjab Kings,...,Bat,Sunrisers Hyderabad,Wickets,4,215,20,N,Normal,Nitin Menon,VK Sharma
1091,1426309,2024,Match 1,Ahmedabad,2024-05-21,7:30 pm,Qualifier 1,MA Starc,Narendra Modi Stadium,Sunrisers Hyderabad,...,Bat,Kolkata Knight Riders,Wickets,8,160,20,N,Normal,AK Chaudhary,R Pandit
1092,1426310,2024,Eliminator,Ahmedabad,2024-05-22,7:30 pm,Eliminator,R Ashwin,Narendra Modi Stadium,Royal Challengers Bangalore,...,Field,Rajasthan Royals,Wickets,4,173,20,N,Normal,KN Ananthapadmanabhan,MV Saidharshan Kumar
1093,1426311,2024,Match 2,Chennai,2024-05-24,7:30 pm,Qualifier 2,Shahbaz Ahmed,MA Chidambaram Stadium,Sunrisers Hyderabad,...,Field,Sunrisers Hyderabad,Runs,36,176,20,N,Normal,Nitin Menon,VK Sharma


# Mannual Imputation

In [37]:
unmatched_all_matches

Unnamed: 0,Season,Match_No,Date,match_key,_merge
46,2008,Match 47,2008-05-22,"(Delhi Capitals, Kolkata Knight Riders)",left_only
65,2009,Match 7,2009-04-21,"(Mumbai Indians, Rajasthan Royals)",left_only
71,2009,Match 13,2009-04-25,"(Chennai Super Kings, Kolkata Knight Riders)",left_only
197,2011,Match 20,2011-04-19,"(Rajasthan Royals, Royal Challengers Bangalore)",left_only
283,2012,Match 32,2012-04-24,"(Kolkata Knight Riders, Sunrisers Hyderabad)",left_only
285,2012,Match 34,2012-04-25,"(Chennai Super Kings, Royal Challengers Bangal...",left_only
488,2015,Match 25,2015-04-26,"(Kolkata Knight Riders, Rajasthan Royals)",left_only
612,2017,Match 29,2017-04-25,"(Royal Challengers Bangalore, Sunrisers Hydera...",left_only
1094,2024,Match 63,2024-05-13,"(Gujarat Titans, Kolkata Knight Riders)",left_only
1097,2024,Match 66,2024-05-16,"(Gujarat Titans, Sunrisers Hyderabad)",left_only


In [38]:
matches[matches['Result'] == 'No Result']

Unnamed: 0,Id,Season,Match_No,City,Date,Time,Match_Type,Player_Of_Match,Venue,Team1,...,Toss_Decision,Winner,Result,Result_Margin,Target_Runs,Target_Overs,Super_Over,Method,Umpire1,Umpire2
241,501265,2011,Match 68,Delhi,2011-05-21,8:00 pm,League,No Result,Arun Jaitley Stadium,Delhi Capitals,...,Bat,No Result,No Result,No Result,No Result,No Result,N,Normal,SS Hazare,RJ Tucker
485,829763,2015,Match 29,Bangalore,2015-04-29,8:00 pm,League,No Result,M Chinnaswamy Stadium,Royal Challengers Bangalore,...,Field,No Result,No Result,No Result,No Result,No Result,N,Normal,JD Cloete,PG Pathak
511,829813,2015,Match 55,Bangalore,2015-05-17,4:00 pm,League,No Result,M Chinnaswamy Stadium,Royal Challengers Bangalore,...,Field,No Result,No Result,No Result,188,20,N,Normal,HDPK Dharmasena,K Srinivasan
744,1178424,2019,Match 49,Bengaluru,2019-04-30,8:00 pm,League,No Result,M Chinnaswamy Stadium,Royal Challengers Bangalore,...,Field,No Result,No Result,No Result,63,5,N,Normal,NJ Llong,UV Gandhe
994,1359519,2023,Match 45,Lucknow,2023-05-03,3:30 pm,League,No Result,Bharat Ratna Shri Atal Bihari Vajpayee Ekana C...,Lucknow Super Giants,...,Field,No Result,No Result,No Result,No Result,No Result,N,Normal,AK Chaudhary,NA Patwardhan


In [39]:
# Since its only 11 matches and all are abandoned without a ball being bowled, we can easily impute mannually

In [40]:
Id = 1426308
Season = 2024
Match_No = 'Match 70'
City = 'Guwahati'
Date = '19 May 2024'
Time = '7:30 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Barsapara Cricket Stadium'
Team1 = 'Rajasthan Royals'
Team2 = 'Kolkata Knight Riders'
Toss_Winner = 'Kolkata Knight Riders'
Toss_Decision = 'Field'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'R Pandit'
Umpire2 = 'MV Saidharshan Kumar'

In [41]:
Id = 1426304
Season = 2024
Match_No = 'Match 66'
City = 'Hyderabad'
Date = '16 May 2024'
Time = '7:30 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Rajiv Gandhi International Stadium'
Team1 = 'Sunrisers Hyderabad'
Team2 = 'Gujarat Titans'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'A Nand Kishore'
Umpire2 = 'VK Sharma'

In [42]:
Id = 1426301
Season = 2024
Match_No = 'Match 63'
City = 'Ahmedabad'
Date = '13 May 2024'
Time = '7:30 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Narendra Modi Stadium'
Team1 = 'Gujarat Titans'
Team2 = 'Kolkata Knight Riders'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'Navdeep Singh'
Umpire2 = 'NA Patwardhan'

In [43]:
Id = 1082619
Season = 2017
Match_No = 'Match 29'
City = 'Bangalore'
Date = '25 April 2017'
Time = '8:00 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'M Chinnaswamy Stadium'
Team1 = 'Royal Challengers Bangalore'
Team2 = 'Sunrisers Hyderabad'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'C Shamshuddin'
Umpire2 = 'M Erasmus'

In [44]:
Id = 829752
Season = 2015
Match_No = 'Match 25'
City = 'Kolkata'
Date = '26 April 2015'
Time = '4:00 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Eden Gardens'
Team1 = 'Kolkata Knight Riders'
Team2 = 'Rajasthan Royals'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'RM Deshpande'
Umpire2 = 'RK Illingworth'

In [45]:
Id = 548340
Season = 2012
Match_No = 'Match 34'
City = 'Bangalore'
Date = '25 April 2012'
Time = '8:00 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'M Chinnaswamy Stadium'
Team1 = 'Royal Challengers Bangalore'
Team2 = 'Chennai Super Kings'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'BR Doctrove'
Umpire2 = 'S Asnani'

In [46]:
Id = 548338
Season = 2012
Match_No = 'Match 32'
City = 'Kolkata'
Date = '24 April 2012'
Time = '8:00 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Eden Gardens'
Team1 = 'Kolkata Knight Riders'
Team2 = 'Sunrisers Hyderabad'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'BF Bowden'
Umpire2 = 'VA Kulkarni'

In [47]:
Id = 501217
Season = 2011
Match_No = 'Match 20'
City = 'Bangalore'
Date = '19 April 2011'
Time = '8:00 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'M Chinnaswamy Stadium'
Team1 = 'Royal Challengers Bangalore'
Team2 = 'Rajasthan Royals'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'BR Doctrove'
Umpire2 = 'RE Koertzen'

In [48]:
Id = 392193
Season = 2009
Match_No = 'Match 13'
City = 'Cape Town'
Date = '25 April 2009'
Time = '4:30 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Newlands'
Team1 = 'Chennai Super Kings'
Team2 = 'Kolkata Knight Riders'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'AM Saheba'
Umpire2 = 'MR Benson'

In [49]:
Id = 392187
Season = 2009
Match_No = 'Match 7'
City = 'Durban'
Date = '21 April 2009'
Time = '4:30 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Kingsmead'
Team1 = 'Mumbai Indians'
Team2 = 'Rajasthan Royals'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'DJ Harper'
Umpire2 = 'TH Wijewardene'

In [50]:
Id = 336030
Season = 2008
Match_No = 'Match 47'
City = 'Delhi'
Date = '22 May 2008'
Time = '8:00 pm'
Match_Type = 'League'
Player_Of_Match = 'Match Abandoned'
Venue = 'Arun Jaitley Stadium'
Team1 = 'Delhi Capitals'
Team2 = 'Kolkata Knight Riders'
Toss_Winner = 'No Toss'
Toss_Decision = 'No Toss'
Result_Margin = 'Match Abandoned'
Target_Runs = 'Match Abandoned'
Target_Overs = 'Match Abandoned'
Super_Over = 'N'
Method = 'Normal'
Umpire1 = 'AV Jayaprakash'
Umpire2 = 'BG Jerling'

In [51]:
missing_matches = [
    {
        'Id': 1426308, 'Season': 2024, 'Match_No': 'Match 70', 'City': 'Guwahati', 
        'Date': '2024-05-19', 'Time': '7:30 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Barsapara Cricket Stadium', 
        'Team1': 'Rajasthan Royals', 'Team2': 'Kolkata Knight Riders', 
        'Toss_Winner': 'Kolkata Knight Riders', 'Toss_Decision': 'Field', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'R Pandit', 'Umpire2': 'MV Saidharshan Kumar'
    },
    {
        'Id': 1426304, 'Season': 2024, 'Match_No': 'Match 66', 'City': 'Hyderabad', 
        'Date': '2024-05-16', 'Time': '7:30 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Rajiv Gandhi International Stadium', 
        'Team1': 'Sunrisers Hyderabad', 'Team2': 'Gujarat Titans', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'A Nand Kishore', 'Umpire2': 'VK Sharma'
    },
    {
        'Id': 1426301, 'Season': 2024, 'Match_No': 'Match 63', 'City': 'Ahmedabad', 
        'Date': '2024-05-13', 'Time': '7:30 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Narendra Modi Stadium', 
        'Team1': 'Gujarat Titans', 'Team2': 'Kolkata Knight Riders', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'Navdeep Singh', 'Umpire2': 'NA Patwardhan'
    },
    {
        'Id': 1082619, 'Season': 2017, 'Match_No': 'Match 29', 'City': 'Bangalore', 
        'Date': '2017-04-25', 'Time': '8:00 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'M Chinnaswamy Stadium', 
        'Team1': 'Royal Challengers Bangalore', 'Team2': 'Sunrisers Hyderabad', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'C Shamshuddin', 'Umpire2': 'M Erasmus'
    },
    {
        'Id': 829752, 'Season': 2015, 'Match_No': 'Match 25', 'City': 'Kolkata', 
        'Date': '2015-04-26', 'Time': '4:00 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Eden Gardens', 
        'Team1': 'Kolkata Knight Riders', 'Team2': 'Rajasthan Royals', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'RM Deshpande', 'Umpire2': 'RK Illingworth'
    },
    {
        'Id': 548340, 'Season': 2012, 'Match_No': 'Match 34', 'City': 'Bangalore', 
        'Date': '2012-04-25', 'Time': '8:00 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'M Chinnaswamy Stadium', 
        'Team1': 'Royal Challengers Bangalore', 'Team2': 'Chennai Super Kings', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'BR Doctrove', 'Umpire2': 'S Asnani'
    },
    {
        'Id': 548338, 'Season': 2012, 'Match_No': 'Match 32', 'City': 'Kolkata', 
        'Date': '2012-04-24', 'Time': '8:00 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Eden Gardens', 
        'Team1': 'Kolkata Knight Riders', 'Team2': 'Sunrisers Hyderabad', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'BF Bowden', 'Umpire2': 'VA Kulkarni'
    },
    {
        'Id': 501217, 'Season': 2011, 'Match_No': 'Match 20', 'City': 'Bangalore', 
        'Date': '2011-04-19', 'Time': '8:00 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'M Chinnaswamy Stadium', 
        'Team1': 'Royal Challengers Bangalore', 'Team2': 'Rajasthan Royals', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'BR Doctrove', 'Umpire2': 'RE Koertzen'
    },
    {
        'Id': 392193, 'Season': 2009, 'Match_No': 'Match 13', 'City': 'Cape Town', 
        'Date': '2009-04-25', 'Time': '4:30 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Newlands', 
        'Team1': 'Chennai Super Kings', 'Team2': 'Kolkata Knight Riders', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'AM Saheba', 'Umpire2': 'MR Benson'
    },
    {
        'Id': 392187, 'Season': 2009, 'Match_No': 'Match 7', 'City': 'Durban', 
        'Date': '2009-04-21', 'Time': '4:30 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Kingsmead', 
        'Team1': 'Mumbai Indians', 'Team2': 'Rajasthan Royals', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'DJ Harper', 'Umpire2': 'TH Wijewardene'
    },
    {
        'Id': 336030, 'Season': 2008, 'Match_No': 'Match 47', 'City': 'Delhi', 
        'Date': '2008-05-22', 'Time': '8:00 pm', 'Match_Type': 'League', 
        'Player_Of_Match': 'Match Abandoned', 'Venue': 'Arun Jaitley Stadium', 
        'Team1': 'Delhi Capitals', 'Team2': 'Kolkata Knight Riders', 
        'Toss_Winner': 'No Toss', 'Toss_Decision': 'No Toss', 
        'Winner': 'No Result', 'Result': 'No Result', 
        'Result_Margin': 'Match Abandoned', 'Target_Runs': 'Match Abandoned', 
        'Target_Overs': 'Match Abandoned', 'Super_Over': 'N', 'Method': 'Normal', 
        'Umpire1': 'AV Jayaprakash', 'Umpire2': 'BG Jerling'
    }
]


missing_matches = pd.DataFrame(missing_matches)
matches = pd.concat([matches, missing_matches], ignore_index=True)
matches['Date'] = pd.to_datetime(matches['Date'])

matches = matches.sort_values('Id').reset_index(drop=True)


# Finally downloading the cleaned and imputed Matches

In [52]:
#matches.to_csv('../data/cleaned/matches.csv', index=False)

In [53]:
matches.columns

Index(['Id', 'Season', 'Match_No', 'City', 'Date', 'Time', 'Match_Type',
       'Player_Of_Match', 'Venue', 'Team1', 'Team2', 'Toss_Winner',
       'Toss_Decision', 'Winner', 'Result', 'Result_Margin', 'Target_Runs',
       'Target_Overs', 'Super_Over', 'Method', 'Umpire1', 'Umpire2'],
      dtype='object')

In [54]:
deliveries.columns

Index(['Match_Id', 'Inning', 'Batting_Team', 'Bowling_Team', 'Over', 'Ball',
       'Batter', 'Bowler', 'Non_Striker', 'Batsman_Runs', 'Extra_Runs',
       'Total_Runs', 'Extras_Type', 'Is_Wicket', 'Player_Dismissed',
       'Dismissal_Kind', 'Fielder'],
      dtype='object')

In [55]:
print('matches.csv')
for i in matches.columns:
    print(i)
    print(matches[i].unique())
print('deliveries.csv')
for i in deliveries.columns:
    print(i)
    print(deliveries[i].unique())

matches.csv
Id
[ 335982  335983  335984 ... 1426310 1426311 1426312]
Season
[2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021
 2022 2023 2024]
Match_No
['Match 1' 'Match 2' 'Match 3' 'Match 5' 'Match 4' 'Match 6' 'Match 7'
 'Match 8' 'Match 9' 'Match 10' 'Match 12' 'Match 11' 'Match 14'
 'Match 13' 'Match 15' 'Match 16' 'Match 17' 'Match 19' 'Match 18'
 'Match 20' 'Match 51' 'Match 22' 'Match 23' 'Match 24' 'Match 25'
 'Match 26' 'Match 27' 'Match 28' 'Match 29' 'Match 30' 'Match 55'
 'Match 31' 'Match 32' 'Match 33' 'Match 34' 'Match 35' 'Match 36'
 'Match 56' 'Match 37' 'Match 38' 'Match 40' 'Match 39' 'Match 42'
 'Match 41' 'Match 43' 'Match 44' 'Match 45' 'Match 46' 'Match 47'
 'Match 48' 'Match 50' 'Match 49' 'Match 21' 'Match 52' 'Match 53'
 'Match 54' 'Match 57' 'Match 58' 'Match 59' 'Match 60' 'Match 61'
 'Match 62' 'Match 63' 'Match 64' 'Match 65' 'Match 66' 'Match 67'
 'Match 68' 'Match 69' 'Match 70' 'Match 71' 'Match 72' 'Match 73'
 'Match 74' 'Match 75