In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# %load_ext cudf.pandas

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import optuna
from catboost import CatBoostRegressor, CatBoostClassifier, Pool
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier, BaggingClassifier, VotingClassifier
import lightgbm as lgb
import xgboost as xgb

# Set display options for pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Initialize tqdm for pandas
tqdm.pandas()

In [None]:
test_data = pd.read_csv('6644a1e287df6_test_data_with_samplefeatures.csv')

In [None]:
batsman_lvl_data = pd.read_csv('663e2b548c98c_batsman_level_scorecard.csv')

In [None]:
bowler_lvl_data = pd.read_csv('663e2b2c60743_bowler_level_scorecard.csv')
batsman_lvl_data = pd.read_csv('663e2b548c98c_batsman_level_scorecard.csv')
match_lvl_data = pd.read_csv('664389efa0868_match_level_scorecard.csv')
train_data = pd.read_csv('663e2b6d54457_train_data_with_samplefeatures.csv')

### Data Augmentation through Swapping teams:

We dobuled the data given to us by swapping teams i.e. Team1 columns replaced with Team2 columns and vice-versa. This potentially address the biasness in our model.

In [None]:
# df_swapped = train_data.copy()

# # Swap team1 and team2 columns
# df_swapped['team1'] = train_data['team2']
# df_swapped['team2'] = train_data['team1']

# # Swap team1_id and team2_id
# df_swapped['team1_id'] = train_data['team2_id']
# df_swapped['team2_id'] = train_data['team1_id']

# # Swap team1_roster_ids and team2_roster_ids
# df_swapped['team1_roster_ids'] = train_data['team2_roster_ids']
# df_swapped['team2_roster_ids'] = train_data['team1_roster_ids']

# # Combine the original DataFrame and the swapped DataFrame
# train_data = pd.concat([train_data, df_swapped], ignore_index=True)

### Data imputation

We imputed missing strike rate values of players with their past matches averages. Same with sixes, and fours.

In [None]:
def fill_na_strike_rate_with_mean(row, df):
    past_matches = df[(df['match_dt'] < row['match_dt']) & (df['batsman_id'] == row['batsman_id'])]
    if not past_matches.empty:
        return past_matches['strike_rate'].mean()
    else:
        return 0

batsman_lvl_data['strike_rate'] = batsman_lvl_data.progress_apply(
    lambda row: fill_na_strike_rate_with_mean(row, batsman_lvl_data) if pd.isna(row['strike_rate']) else row['strike_rate'],
    axis=1
)

100%|████████████████████████████████████████████████████████████████████████| 24483/24483 [00:00<00:00, 140995.90it/s]


In [None]:
def fill_na_Sixes_with_mean(row, df):
    past_matches = df[(df['match_dt'] < row['match_dt']) & (df['batsman_id'] == row['batsman_id'])]
    if not past_matches.empty:
        return past_matches['Sixes'].mean()
    else:
        return 0

batsman_lvl_data['Sixes'] = batsman_lvl_data.progress_apply(
    lambda row: fill_na_Sixes_with_mean(row, batsman_lvl_data) if pd.isna(row['Sixes']) else row['Sixes'],
    axis=1
)

100%|██████████████████████████████████████████████████████████████████████████| 24483/24483 [00:16<00:00, 1459.37it/s]


In [None]:
def fill_na_Fours_with_mean(row, df):
    past_matches = df[(df['match_dt'] < row['match_dt']) & (df['batsman_id'] == row['batsman_id'])]
    if not past_matches.empty:
        return past_matches['Fours'].mean()
    else:
        return 0

batsman_lvl_data['Fours'] = batsman_lvl_data.progress_apply(
    lambda row: fill_na_Fours_with_mean(row, batsman_lvl_data) if pd.isna(row['Fours']) else row['Fours'],
    axis=1
)

100%|██████████████████████████████████████████████████████████████████████████| 24483/24483 [00:11<00:00, 2078.25it/s]


In [None]:
train_data['winner_01'] = train_data.apply(lambda x: 0 if (x['team1']==x['winner']) else 1, axis=1)

In [None]:
train_data['toss_winner_01'] = np.where(train_data['toss winner']== train_data['team2'], 1, 0)
test_data['toss_winner_01'] = np.where(test_data['toss winner']== test_data['team2'], 1, 0)

In [None]:
train_data['toss_decision_01'] = np.where(train_data['toss decision']=='bat', 1, 0)
test_data['toss_decision_01'] = np.where(test_data['toss decision']=='bat', 1, 0)

In [None]:
columns_remove = ['umpire1','umpire2','player_of_the_match_id','city','venue','batsman_details','bowler_details']

In [None]:
train_data['match_dt'] = pd.to_datetime(train_data['match_dt'])
batsman_lvl_data['match_dt'] = pd.to_datetime(batsman_lvl_data['match_dt'])
bowler_lvl_data['match_dt'] = pd.to_datetime(bowler_lvl_data['match_dt'])
match_lvl_data['match_dt'] = pd.to_datetime(match_lvl_data['match_dt'])

# train_data = train_data.drop(columns = columns_remove, errors = 'ignore')
# batsman_lvl_data = batsman_lvl_data.drop(columns = columns_remove, errors = 'ignore')
# bowler_lvl_data = bowler_lvl_data.drop(columns = columns_remove, errors = 'ignore')
# match_lvl_data = match_lvl_data.drop(columns = columns_remove, errors = 'ignore')

In [None]:
def giveLastNgamesPlayer(player_id, date, n, bat_or_bowl):
    if bat_or_bowl == 'bat':
        df_topick = batsman_lvl_data
        id_col = 'batsman_id'
    else:
        df_topick = bowler_lvl_data
        id_col = 'bowler_id'

    player_data = df_topick[(df_topick['match_dt'] < date) & (df_topick[id_col] == float(player_id))]\
        .sort_values(by='match_dt', ascending=False).head(n)

    return player_data

# Feature Engineering



### Batting Performance (BP), Adjusted Combined bowling Rate (ACBR), Team_Strength and Current Form

These features have been refferenced from [this Research Paper](https://journals.sagepub.com/doi/10.1177/0258042X20912603?icid=int.sj-abstract.similar-articles.1)

In [None]:
def calculate_BP_for_player(player_id, date, n):
    player_data = giveLastNgamesPlayer(player_id, date, n, 'bat')
    if player_data.empty:
        return 0
    player_data['strike_rate'] = (player_data['runs'] / player_data['balls_faced']) * 100
    match_runs = match_lvl_data.loc[match_lvl_data['match id'].isin(player_data['match id']), ['match id', 'inning1_runs', 'inning2_runs', 'inning1_balls', 'inning2_balls']]
    match_runs['total_runs'] = match_runs['inning1_runs'] + match_runs['inning2_runs']
    match_runs['total_balls'] = match_runs['inning1_balls'] + match_runs['inning2_balls']
    match_runs['MSR'] = (match_runs['total_runs'] / match_runs['total_balls']) * 100
    player_data = player_data.merge(match_runs[['match id', 'MSR']], on='match id')
    player_data['BP'] = (player_data['runs']) * ((player_data['strike_rate']/ player_data['MSR'])**0.5)
    return player_data['BP'].mean()

In [None]:
def calculate_ACBR_for_player(player_id, date, n):
    player_data = giveLastNgamesPlayer(player_id, date, n, 'bowl')
    if player_data.empty:
        return 0
    player_data['RPB'] = (player_data['runs'])/player_data['balls_bowled']
    match_runs = match_lvl_data.loc[match_lvl_data['match id'].isin(player_data['match id']),
                                            ['match id', 'inning1_runs', 'inning2_runs', 'inning1_balls', 'inning2_balls']]
    match_runs['total_runs'] = match_runs['inning1_runs'] + match_runs['inning2_runs']
    match_runs['total_balls'] = match_runs['inning1_balls'] + match_runs['inning2_balls']
    match_runs['RPBM'] = (match_runs['total_runs'] / match_runs['total_balls'])
    player_data = player_data.merge(match_runs[['match id', 'RPBM']], on='match id')

    player_data['RA'] = player_data['runs']*((player_data['RPB']/player_data['RPBM'])**0.5)
    player_data['Wi_star'] = player_data['wicket_count'] # Placeholder for actual weighted calculation
    Ri = np.nansum(player_data['RA'])
    Wi = np.nansum(player_data['Wi_star'])
    Bi = np.nansum(player_data['balls_bowled'])
    ACBR = (3*Ri)/(Wi + (Bi/6) + (Wi*(Ri/Bi)))
    return ACBR

In [None]:
def calculate_team_metrics(row, n=5):
    team_bp = []
    team_acbr = []
    date = row['match_dt']
    team1_players = row['team1_roster_ids'].split(':')
    team2_players = row['team2_roster_ids'].split(':')

    team1_bp = [calculate_BP_for_player(player_id, date, n) for player_id in team1_players]
    team2_bp = [calculate_BP_for_player(player_id, date, n) for player_id in team2_players]

    team1_acbr = [calculate_ACBR_for_player(player_id, date, n) for player_id in team1_players]
    team2_acbr = [calculate_ACBR_for_player(player_id, date, n) for player_id in team2_players]

    team1_bp = sum(team1_bp) / len(team1_bp) if team1_bp else 0
    team2_bp = sum(team2_bp) / len(team2_bp) if team2_bp else 0

    team1_acbr = sum(team1_acbr) / len(team1_acbr) if team1_acbr else 0
    team2_acbr = sum(team2_acbr) / len(team2_acbr) if team2_acbr else 0

    return (team1_bp,team2_bp,team1_acbr,team2_acbr)

In [None]:
results = train_data.progress_apply(lambda row: calculate_team_metrics(row), axis=1)
train_data[['team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr']] = pd.DataFrame(results.tolist(), index=train_data.index)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [02:03<00:00,  7.69it/s]


In [None]:
results = test_data.progress_apply(lambda row: calculate_team_metrics(row), axis=1)
test_data[['team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr']] = pd.DataFrame(results.tolist(), index=test_data.index)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:37<00:00,  7.24it/s]


In [None]:
team_bps = []
team_acbrs = []

team_bps.append(train_data['team1_bp'].values)
team_bps.append(train_data['team2_bp'].values)
team_bps = np.concatenate(team_bps)
team_acbrs.append(train_data['team1_acbr'].values)
team_acbrs.append(train_data['team2_acbr'].values)
team_acbrs = np.concatenate(team_acbrs)
max_bp = max(team_bps)
min_bp = min(team_bps)
max_acbr = max(team_acbrs)
min_acbr = min(team_acbrs)

In [None]:
def normalized_metrics(row):
    team1_bp = row['team1_bp']
    team2_bp = row['team2_bp']

    team1_acbr = row['team1_acbr']
    team2_acbr = row['team2_acbr']

    normalized_bp_team1 = (team1_bp - min_bp) / (max_bp - min_bp)
    normalized_bp_team2 = (team2_bp - min_bp) / (max_bp - min_bp)

    normalized_acbr_team1 = (max_acbr - team1_acbr) / (max_acbr - min_acbr)
    normalized_acbr_team2 = (max_acbr - team2_acbr) / (max_acbr - min_acbr)

    return (normalized_bp_team1, normalized_bp_team2,normalized_acbr_team1,normalized_acbr_team2)

In [None]:
results = train_data.progress_apply(lambda row: normalized_metrics(row), axis=1)
normalized_metrics = pd.DataFrame()
normalized_metrics[['normalized_bp_team1','normalized_bp_team2','normalized_acbr_team1','normalized_acbr_team2']] = pd.DataFrame(results.tolist(), index=train_data.index)

100%|█████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 79784.10it/s]


In [None]:
var_bp = np.nanvar(np.concatenate(normalized_metrics.loc[:,['normalized_bp_team1','normalized_bp_team2']].values))
var_acbr = np.nanvar(np.concatenate(normalized_metrics.loc[:,['normalized_acbr_team1','normalized_acbr_team2']].values))
C = 1/((1/np.sqrt(var_bp)) + (1/np.sqrt(var_acbr)))
w_bp = C/np.sqrt(var_bp)
w_acbr = C/np.sqrt(var_acbr)

In [None]:
def calculate_team_strength(row):
    team1_bp = row['team1_bp']
    team2_bp = row['team2_bp']

    team1_acbr = row['team1_acbr']
    team2_acbr = row['team2_acbr']

    normalized_bp_team1 = (team1_bp - min_bp) / (max_bp - min_bp)
    normalized_bp_team2 = (team2_bp - min_bp) / (max_bp - min_bp)

    normalized_acbr_team1 = (max_acbr - team1_acbr) / (max_acbr - min_acbr)
    normalized_acbr_team2 = (max_acbr - team2_acbr) / (max_acbr - min_acbr)

    strength_team1 = w_bp * normalized_bp_team1 + w_acbr * normalized_acbr_team1
    strength_team2 = w_bp * normalized_bp_team2 + w_acbr * normalized_acbr_team2

    return (strength_team1, strength_team2)

In [None]:
results = train_data.progress_apply(lambda row: calculate_team_strength(row), axis=1)
train_data[['strength_team1', 'strength_team2']] = pd.DataFrame(results.tolist(), index=train_data.index)

100%|█████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 84216.55it/s]


In [None]:
team_bps = []
team_acbrs = []

team_bps.append(test_data['team1_bp'].values)
team_bps.append(test_data['team2_bp'].values)
team_bps = np.concatenate(team_bps)
team_acbrs.append(test_data['team1_acbr'].values)
team_acbrs.append(test_data['team2_acbr'].values)
team_acbrs = np.concatenate(team_acbrs)
max_bp = max(team_bps)
min_bp = min(team_bps)
max_acbr = max(team_acbrs)
min_acbr = min(team_acbrs)

In [None]:
results = test_data.progress_apply(lambda row: calculate_team_strength(row), axis=1)
test_data[['strength_team1', 'strength_team2']] = pd.DataFrame(results.tolist(), index=test_data.index)

100%|█████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 63954.11it/s]


In [None]:
def get_cf(team_id, match_date, team_strength, n_last_matches):
    n_last_matches = n_last_matches if n_last_matches < 5 else 5
    last_matches = match_lvl_data[((match_lvl_data['team1_id'] == team_id) | (match_lvl_data['team2_id'] == team_id)) & (match_lvl_data['match_dt'] < match_date)]
    last_matches = last_matches.sort_values(by='match_dt', ascending=False).head(n_last_matches)

    if last_matches.empty:
        return 0

    last_matches['win'] = (last_matches['winner_id'] == team_id).astype(int)
    last_matches['weight'] = [(0.8 ** i) for i in range(len(last_matches))]
    strength_factor = team_strength
    x = np.array([((1-strength_factor)**(i+1)) for i in range(len(last_matches))])
    y = np.array(last_matches['win'])
    cf = (((x*y).sum())/(x.sum()))
    return cf

def calculate_current_form(row):
    date = row['match_dt']
    team1_id = row['team1_id']
    team2_id = row['team2_id']
    team1_strength = row['strength_team1']
    team2_strength = row['strength_team2']

    a = len(match_lvl_data[((match_lvl_data['team1_id'] == team1_id) | (match_lvl_data['team2_id'] == team1_id))
                                            & (match_lvl_data['match_dt'] < date)].sort_values(by='match_dt', ascending=False))
    b = len(match_lvl_data[((match_lvl_data['team1_id'] == team2_id) | (match_lvl_data['team2_id'] == team2_id))
                                            & (match_lvl_data['match_dt'] < date)].sort_values(by='match_dt', ascending=False))

    n_last_matches = a if a < b else b

    team1_cf = get_cf(team1_id, date, team1_strength, n_last_matches)
    team2_cf = get_cf(team2_id, date, team2_strength, n_last_matches)

    return (team1_cf, team2_cf)

In [None]:
results = train_data.progress_apply(lambda row: calculate_current_form(row), axis=1)
train_data[['current_form_team1', 'current_form_team2']] = pd.DataFrame(results.tolist(), index=train_data.index)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:03<00:00, 282.51it/s]


In [None]:
results = test_data.progress_apply(lambda row: calculate_current_form(row), axis=1)
test_data[['current_form_team1', 'current_form_team2']] = pd.DataFrame(results.tolist(), index=test_data.index)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 275.20it/s]




---



### Number of 50s in last N Games

In [None]:
def no50sLastn(player_list, date, n):
    '''
    Function to get total number of 50s scored by players in the roster of a team in last n games.

    Input-
    1. player_list: ':' separated list of player ids in the rost
    er of a team.
    2. date: match date of the game to calculate this feature.
    3. n: Number of games to look-back and create this feature.

    Output-None

    Returns- int value denoting sum of 50s scored by all players in the roster.
    '''

    player_list = str(player_list).split(':') # split string of ':' separated ids into a list of ids
    res_list = []
    for player in player_list: # loop over each player_id in roster
        df_rel = giveLastNgamesPlayer(player_id=player, date=date, n=n, bat_or_bowl='bat') # getting batting stats from last n games for each player.
        df_rel['gte_50runs'] = np.where(df_rel['runs']>=50, 1, 0) # binary indicator to denote whether the player scored a 50 in the game (runs>=50).
        res_list.append(np.nansum(df_rel['gte_50runs']))# Sum up number of 50s for the player and append to a list. We will do this for all players.
    return np.nansum(res_list)# Sum up values of the list which is sum of 50s by all players in the roster.

In [None]:
# Computing number of 50 runs in last 15 games for team1 for train dataset.
train_data['team1_count_50runs_last15'] = train_data.progress_apply(lambda x: \
            no50sLastn(player_list=x['team1_roster_ids'], date=x['match_dt'], n=5), axis=1)
# Computing number of 50 runs in last 15 games for team2 for train dataset.
train_data['team2_count_50runs_last15'] = train_data.progress_apply(lambda x: \
            no50sLastn(player_list=x['team2_roster_ids'], date=x['match_dt'], n=5), axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:09<00:00, 95.27it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:09<00:00, 98.13it/s]


In [None]:
test_data['team1_count_50runs_last15'] = test_data.progress_apply(lambda x: \
            no50sLastn(player_list=x['team1_roster_ids'], date=x['match_dt'], n=5), axis=1)
test_data['team2_count_50runs_last15'] = test_data.progress_apply(lambda x: \
            no50sLastn(player_list=x['team2_roster_ids'], date=x['match_dt'], n=5), axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 91.67it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 92.94it/s]


Unnamed: 0,match id,team1,team1_id,team1_roster_ids,team2,team2_id,team2_roster_ids,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,toss_winner_01,toss_decision_01,team1_bp,team2_bp,team1_acbr,team2_acbr,strength_team1,strength_team2,current_form_team1,current_form_team2,team1_count_50runs_last15,team2_count_50runs_last15
0,9250275,Jy,28594,7438955.0:8271969.0:8369661.0:3685247.0:2590251.0:7785497.0:7785553.0:3296264.0:4613622.0:2904796.0:3685289.0,Ud Ss of Aa,90,2436944.0:8117500.0:6703528.0:3010748.0:1613870.0:8934764.0:2721480.0:1632819.0:2789604.0:6489846.0:1565507.0,Ud Ss of Aa,field,Bo Ac Cb,Bulawayo,2022-07-11,day match,Ud Ss of Aa tr of Ze,2022,3226,0.0,0.019608,,0.0,,1,0,0.908,13.432074,0.0,6.028728,0.556415,0.630573,0.0,0.0,0,2
1,9262189,Ga An Ws,36084,3715690.0:6818622.0:4069666.0:393014.0:4690188.0:4739552.0:4690328.0:5744850.0:4739580.0:9266102.0:284339.0,Ja Ts,36098,4690258.0:3761246.0:8464385.0:5742470.0:3150720.0:4690104.0:181404.0:1594319.0:5509524.0:4239794.0:2036649.0,Ja Ts,field,Pe Sm Ga,Providence,2022-09-21,night match,Cn Pr Le,2022,13915,0.615385,0.344262,151.285714,66.67,153.5,1,0,14.555116,16.745207,11.15597,12.357269,0.53032,0.53986,0.119725,0.332058,3,4


### Win Percent of teams in last n

In [None]:
def winpLastn(team_id, date, n):
    '''
    Get a team's win % in last n games. If a team has won 3 game out of their last 5, win% is 60%.
    '''
    # filter out games with either team1/2_id as input team id, match_dt being before current game's date, sort desc by date, and get top n rows (games)
    df_rel = match_lvl_data[(match_lvl_data['match_dt']<date)&\
                      ((match_lvl_data['team1_id']==team_id)|(match_lvl_data['team2_id']==team_id))]\
                        .sort_values(by='match_dt', ascending=False).head(n)
    win_count = df_rel[df_rel['winner_id']==team_id].shape[0] # count number of rows having winner as the input team
    if win_count == 0:
        return 0
    return round(win_count*100/df_rel.shape[0], 2) # return win% rounded to two decimal points

In [None]:
# Compute team1's win% in last 10 games
train_data['team1_winp_last5'] = train_data.progress_apply(lambda x: \
                                  winpLastn(x['team1_id'], x['match_dt'], 5), axis=1)
# Compute team2's win% in last 10 games
train_data['team2_winp_last5'] = train_data.progress_apply(lambda x: \
                                  winpLastn(x['team2_id'], x['match_dt'], 5), axis=1)

100%|██████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 1000.90it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 1145.95it/s]


In [None]:
test_data['team1_winp_last5'] = test_data.progress_apply(lambda x: \
            winpLastn(team_id=x['team1_id'], date=x['match_dt'], n=5), axis=1)
test_data['team2_winp_last5'] = test_data.progress_apply(lambda x: \
            winpLastn(team_id=x['team2_id'], date=x['match_dt'], n=5), axis=1)

100%|██████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 1108.99it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 1125.29it/s]


Unnamed: 0,match id,team1,team1_id,team1_roster_ids,team2,team2_id,team2_roster_ids,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,toss_winner_01,toss_decision_01,team1_bp,team2_bp,team1_acbr,team2_acbr,strength_team1,strength_team2,current_form_team1,current_form_team2,team1_count_50runs_last15,team2_count_50runs_last15,team1_winp_last5,team2_winp_last5
0,9250275,Jy,28594,7438955.0:8271969.0:8369661.0:3685247.0:2590251.0:7785497.0:7785553.0:3296264.0:4613622.0:2904796.0:3685289.0,Ud Ss of Aa,90,2436944.0:8117500.0:6703528.0:3010748.0:1613870.0:8934764.0:2721480.0:1632819.0:2789604.0:6489846.0:1565507.0,Ud Ss of Aa,field,Bo Ac Cb,Bulawayo,2022-07-11,day match,Ud Ss of Aa tr of Ze,2022,3226,0.0,0.019608,,0.0,,1,0,0.908,13.432074,0.0,6.028728,0.556415,0.630573,0.0,0.0,0,2,0.0,50.0
1,9262189,Ga An Ws,36084,3715690.0:6818622.0:4069666.0:393014.0:4690188.0:4739552.0:4690328.0:5744850.0:4739580.0:9266102.0:284339.0,Ja Ts,36098,4690258.0:3761246.0:8464385.0:5742470.0:3150720.0:4690104.0:181404.0:1594319.0:5509524.0:4239794.0:2036649.0,Ja Ts,field,Pe Sm Ga,Providence,2022-09-21,night match,Cn Pr Le,2022,13915,0.615385,0.344262,151.285714,66.67,153.5,1,0,14.555116,16.745207,11.15597,12.357269,0.53032,0.53986,0.119725,0.332058,3,4,20.0,60.0


In [None]:
match_lvl_data['team1_bat_inning'] = np.where( ((match_lvl_data['team1']==match_lvl_data['toss winner'])&(match_lvl_data['toss decision']=='bat'))|\
                                               ((match_lvl_data['team2']==match_lvl_data['toss winner'])&(match_lvl_data['toss decision']=='field')) , 1, 2)
match_lvl_data['team2_bat_inning'] = np.where( ((match_lvl_data['team1']==match_lvl_data['toss winner'])&(match_lvl_data['toss decision']=='field'))|\
                                               ((match_lvl_data['team2']==match_lvl_data['toss winner'])&(match_lvl_data['toss decision']=='bat')) , 1, 2)

### Team Average Runs Last n games

In [None]:
import pandas as pd

def teamAvgRunsLastn(team_id, date, n):
    '''
    Function to calculate a team's average runs in their last n games.

    Input-
    1. team_id: ID of the team to calculate average runs.
    2. date: match date of the current game for which the feature is calculated.
    3. n: look-back window of games for the team.
    4. match_lvl_data: DataFrame containing match level data.

    Output- None

    Return- Float value denoting average of runs scored by team1 in their last n games.
    '''
    # Filter out games with either team1_id or team2_id as the input team_id, and match date less than the input date
    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) &
                            ((match_lvl_data['team1_id'] == team_id) | (match_lvl_data['team2_id'] == team_id))] \
                            .sort_values(by='match_dt', ascending=False).head(n)

    if df_rel.empty:
        return 0.0

    # Separate dataframes for when the team is batting first or second
    df_batting_first = df_rel[df_rel['team1_id'] == team_id][['inning1_runs']].rename(columns={'inning1_runs': 'runs'})
    df_batting_second = df_rel[df_rel['team2_id'] == team_id][['inning2_runs']].rename(columns={'inning2_runs': 'runs'})

    # Concatenate the two dataframes
    df_combined = pd.concat([df_batting_first, df_batting_second])

    # Calculate the mean runs, ensuring to handle any NaN values
    return df_combined['runs'].dropna().mean()

In [None]:
# Compute average runs scored by team1 in their last 15 games for train data.
train_data['team1only_avg_runs_last15'] = train_data.progress_apply(lambda x: \
                                  teamAvgRunsLastn(x['team1_id'], x['match_dt'], 5), axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:02<00:00, 447.92it/s]


In [None]:
train_data['team2only_avg_runs_last15'] = train_data.progress_apply(lambda x: \
                                  teamAvgRunsLastn(x['team2_id'], x['match_dt'], 5), axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:02<00:00, 449.21it/s]


In [None]:
test_data['team1only_avg_runs_last15'] = test_data.progress_apply(lambda x: \
            teamAvgRunsLastn(x['team1_id'], x['match_dt'], 5), axis=1)
test_data['team2only_avg_runs_last15'] = test_data.progress_apply(lambda x: \
                                  teamAvgRunsLastn(x['team2_id'], x['match_dt'], 5), axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 442.51it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 441.65it/s]


Unnamed: 0,match id,team1,team1_id,team1_roster_ids,team2,team2_id,team2_roster_ids,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,toss_winner_01,toss_decision_01,team1_bp,team2_bp,team1_acbr,team2_acbr,strength_team1,strength_team2,current_form_team1,current_form_team2,team1_count_50runs_last15,team2_count_50runs_last15,team1_winp_last5,team2_winp_last5,team2only_avg_runs_last15
0,9250275,Jy,28594,7438955.0:8271969.0:8369661.0:3685247.0:2590251.0:7785497.0:7785553.0:3296264.0:4613622.0:2904796.0:3685289.0,Ud Ss of Aa,90,2436944.0:8117500.0:6703528.0:3010748.0:1613870.0:8934764.0:2721480.0:1632819.0:2789604.0:6489846.0:1565507.0,Ud Ss of Aa,field,Bo Ac Cb,Bulawayo,2022-07-11,day match,Ud Ss of Aa tr of Ze,2022,3226,0.0,0.019608,0.0,0.0,,1,0,0.908,13.432074,0.0,6.028728,0.556415,0.630573,0.0,0.0,0,2,0.0,50.0,164.5
1,9262189,Ga An Ws,36084,3715690.0:6818622.0:4069666.0:393014.0:4690188.0:4739552.0:4690328.0:5744850.0:4739580.0:9266102.0:284339.0,Ja Ts,36098,4690258.0:3761246.0:8464385.0:5742470.0:3150720.0:4690104.0:181404.0:1594319.0:5509524.0:4239794.0:2036649.0,Ja Ts,field,Pe Sm Ga,Providence,2022-09-21,night match,Cn Pr Le,2022,13915,0.615385,0.344262,137.4,66.67,153.5,1,0,14.555116,16.745207,11.15597,12.357269,0.53032,0.53986,0.119725,0.332058,3,4,20.0,60.0,159.6


### Team Win percentage against each other last n games

In [None]:
import pandas as pd

def winpCrossLastn(team1_id, team2_id, date, n, match_lvl_data):
    '''
    Function to compute team1's win% against team2 from the current game in their past n encounters.

    Input-
    1. team1_id: ID of team1 to calculate win% of.
    2. team2_id: ID of team2 to calculate win% against.
    3. date: match date of the current game for which the feature is to be calculated.
    4. n: look-back window of games for both these teams.
    5. match_lvl_data: DataFrame containing match level data.

    Output- None

    Returns- Float value denoting team1's win% against team2 in their past n games against each other.
    '''
    # Filter the matches to include only those between the two teams and before the given date
    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) &
                            (((match_lvl_data['team1_id'] == team1_id) & (match_lvl_data['team2_id'] == team2_id)) |
                             ((match_lvl_data['team1_id'] == team2_id) & (match_lvl_data['team2_id'] == team1_id)))] \
                            .sort_values(by='match_dt', ascending=False).head(n)

    if df_rel.empty:
        return 0.0

    # Count the number of matches where team1 won
    win_count = df_rel[df_rel['winner_id'] == team1_id].shape[0]

    # Calculate the win percentage
    win_percentage = (win_count / df_rel.shape[0]) * 100

    return round(win_percentage, 2)

In [None]:
# Compute team1 win% against team2 in their past 15 encounters for train data.
train_data['team1_winp_team2_last15'] = train_data.progress_apply(lambda x: \
                                  winpCrossLastn(x['team1_id'], x['team2_id'], x['match_dt'], 5, match_lvl_data), axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 927.74it/s]


In [None]:
# Similarly for test data.
test_data['team1_winp_team2_last15'] = test_data.progress_apply(lambda x: \
                                  winpCrossLastn(x['team1_id'], x['team2_id'], x['match_dt'], 5, match_lvl_data), axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 896.71it/s]


### Average runs made by a team in last n games on a ground

In [None]:
import numpy as np
import pandas as pd

def avgRunsGround(team_id, ground_id, date, n, match_lvl_data):
    '''
    Function to calculate average runs scored in ground/venue by a team in last n matches.
    Returns- Average runs scored in the ground.
    '''
    # Filter out games with ground_id and date earlier than the input date, sort desc by date, and select top n rows (games).
    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) & (match_lvl_data['ground_id'] == ground_id)].sort_values(by='match_dt', ascending=False).head(n)

    # Filter rows where the team_id matches either team1_id or team2_id
    df_rel = df_rel[(df_rel['team1_id'] == team_id) | (df_rel['team2_id'] == team_id)]

    # Ensure no NaN values are present in the runs columns
    df_rel = df_rel.dropna(subset=['inning1_runs', 'inning2_runs'])

    runs_balls = []

    # Iterate over the rows to determine the runs based on the toss decision
    for idx, row in df_rel.iterrows():
        if row['team1_id'] == team_id:
            if row['toss winner'] == team_id:
                if row['toss decision'] == 'bat':
                    runs_balls.append(row['inning1_runs'])
                else:
                    runs_balls.append(row['inning2_runs'])
            else:
                if row['toss decision'] == 'field':
                    runs_balls.append(row['inning1_runs'])
                else:
                    runs_balls.append(row['inning2_runs'])
        elif row['team2_id'] == team_id:
            if row['toss winner'] == team_id:
                if row['toss decision'] == 'bat':
                    runs_balls.append(row['inning1_runs'])
                else:
                    runs_balls.append(row['inning2_runs'])
            else:
                if row['toss decision'] == 'field':
                    runs_balls.append(row['inning1_runs'])
                else:
                    runs_balls.append(row['inning2_runs'])

    # Return the mean of the collected runs
    if runs_balls:
        return np.nanmean(runs_balls)
    else:
        return 0

In [None]:
## Calculate average runs in the ground for last 15 games hosted in that venue for train data.
train_data['ground_avg_runs_team1last15'] = train_data.progress_apply(lambda x: \
                                  avgRunsGround(x['team1_id'],x['ground_id'], x['match_dt'], 5, match_lvl_data), axis=1)

train_data['ground_avg_runs_team2last15'] = train_data.progress_apply(lambda x: \
                                  avgRunsGround(x['team2_id'], x['ground_id'], x['match_dt'], 5, match_lvl_data), axis=1)

## Calculate average runs in the ground for last 15 games hosted in that venue for train data.
test_data['ground_avg_runs_team1last15'] = test_data.progress_apply(lambda x: \
                                  avgRunsGround(x['team1_id'],x['ground_id'], x['match_dt'], 5, match_lvl_data), axis=1)

test_data['ground_avg_runs_team2last15'] = test_data.progress_apply(lambda x: \
                                  avgRunsGround(x['team2_id'], x['ground_id'], x['match_dt'], 5, match_lvl_data), axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 608.01it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 630.60it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 624.58it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 618.37it/s]


### Combined average Strike rate of Teams in last n Games

In [None]:
def avg_strike_rate_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(batsman_lvl_data['batsman_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bat')
      result = np.append(result,df['strike_rate'].mean() if df.size != 0 else 0)

    return np.nanmean(result)

In [None]:
train_data['total_strike_rate_team_1_last10'] = train_data.progress_apply(
    lambda row: avg_strike_rate_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_strike_rate_team_2_last10'] = train_data.progress_apply(
    lambda row: avg_strike_rate_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 127.23it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 123.96it/s]


In [None]:
test_data['total_strike_rate_team_1_last10'] = test_data.progress_apply(
    lambda row: avg_strike_rate_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_strike_rate_team_2_last10'] = test_data.progress_apply(
    lambda row: avg_strike_rate_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 122.76it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 124.07it/s]


### Combined average Fours scored by Teams in last n Games

In [None]:
def avg_Fours_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(batsman_lvl_data['batsman_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bat')
      result = np.append(result,df['Fours'].mean() if df.size != 0 else 0)

    return np.nanmean(result)

### Combined average Sixes scored by Teams in last n Games

In [None]:
def avg_sixes_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(batsman_lvl_data['batsman_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bat')
      result = np.append(result,df['Sixes'].sum() if df.size != 0 else 0)

    return np.nanmean(result)

In [None]:
train_data['total_fours_avg_team1_last10'] = train_data.progress_apply(
    lambda row: avg_Fours_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_fours_avg_team2_last10'] = train_data.progress_apply(
    lambda row: avg_Fours_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

train_data['total_sixes_avg_team1_last10'] = train_data.progress_apply(
    lambda row: avg_sixes_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_sixes_avg_team2_last10'] = train_data.progress_apply(
    lambda row: avg_sixes_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 126.66it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 122.51it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 128.00it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 122.23it/s]


In [None]:
test_data['total_fours_avg_team1_last10'] = test_data.progress_apply(
    lambda row: avg_Fours_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_fours_avg_team2_last10'] = test_data.progress_apply(
    lambda row: avg_Fours_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)
test_data['total_sixes_avg_team1_last10'] = test_data.progress_apply(
    lambda row: avg_sixes_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_sixes_avg_team2_last10'] = test_data.progress_apply(
    lambda row: avg_sixes_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 120.55it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 123.41it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 124.06it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 123.75it/s]


### Combined average Wickets taken by Teams in last n Games

In [None]:
def total_avg_wickets_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(bowler_lvl_data['bowler_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bowl')
      result = np.append(result,df['wicket_count'].mean() if df.size != 0 else 0)

    return np.nanmean(result)

In [None]:
train_data['total_avg_wicket_team1_last10'] = train_data.progress_apply(
    lambda row: total_avg_wickets_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_avg_wicket_team2_last10'] = train_data.progress_apply(
    lambda row: total_avg_wickets_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:04<00:00, 189.67it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:05<00:00, 181.41it/s]


In [None]:
test_data['total_avg_wicket_team1_last10'] = test_data.progress_apply(
    lambda row: total_avg_wickets_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_avg_wicket_team2_last10'] = test_data.progress_apply(
    lambda row: total_avg_wickets_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 185.31it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 185.33it/s]


### Combined Average Run rate of teams based on last n games

In [None]:
def total_avg_run_rate_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(batsman_lvl_data['batsman_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bat')
      result = np.append(result,(df['runs'] / df['balls_faced']).mean() if df.size != 0 else 0)

    return np.nanmean(result)

In [None]:
train_data['total_run_rate_team_1_last10'] = train_data.progress_apply(
    lambda row: total_avg_run_rate_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_run_rate_team_2_last10'] = train_data.progress_apply(
    lambda row: total_avg_run_rate_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:09<00:00, 104.80it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:08<00:00, 109.23it/s]


In [None]:
test_data['total_run_rate_team_1_last10'] = test_data.progress_apply(
    lambda row: total_avg_run_rate_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_run_rate_team_2_last10'] = test_data.progress_apply(
    lambda row: total_avg_run_rate_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 106.12it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 105.87it/s]


### Number of all rounders in teams

In [None]:
def create_all_rounders_df(batsmen_level,bowler_level):
  batsmen_level['batsman_id'] = batsmen_level['batsman_id'].apply(float)
  bowler_level['bowler_id'] = bowler_level['bowler_id'].apply(float)
  batsmen = batsmen_level['batsman_id'].unique()
  bowlers = bowler_level['bowler_id'].unique()
  all_rounders = set(batsmen).intersection(set(bowlers))
  return pd.DataFrame(all_rounders, columns = ['player_id'])

In [None]:
all_rounders = create_all_rounders_df(batsman_lvl_data,bowler_lvl_data)

In [None]:
def num_all_rounders(player_list):
  player_list = pd.Series(str(player_list).split(':')).apply(float)
  return len(player_list[player_list.isin(all_rounders['player_id'])])

In [None]:
train_data['num_all_rounders_team_1'] = train_data.progress_apply(lambda row: num_all_rounders(row['team1_roster_ids']), axis=1)
train_data['num_all_rounders_team_2'] = train_data.progress_apply(lambda row: num_all_rounders(row['team2_roster_ids']), axis=1)

100%|██████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 4979.76it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 948/948 [00:00<00:00, 4732.99it/s]


In [None]:
test_data['num_all_rounders_team_1'] = test_data.progress_apply(lambda row: num_all_rounders(row['team1_roster_ids']), axis=1)
test_data['num_all_rounders_team_2'] = test_data.progress_apply(lambda row: num_all_rounders(row['team2_roster_ids']), axis=1)

100%|██████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 5017.00it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 4539.58it/s]


### Combined Bowling Average based on team's last n matches




In [None]:
def total_bowling_avg_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(bowler_lvl_data['bowler_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bowl')
      df = (df['runs'] / (df['wicket_count']+1)).mean() if df.size !=0 else 0
      result = np.append(result,df)

    return np.nanmean(result)

In [None]:
train_data['total_bowling_avg_team1_last_10'] = train_data.progress_apply(
    lambda row: total_bowling_avg_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_bowling_avg_team2_last_10'] = train_data.progress_apply(
    lambda row: total_bowling_avg_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:06<00:00, 141.64it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:06<00:00, 150.38it/s]


In [None]:
test_data['total_bowling_avg_team1_last_10'] = test_data.progress_apply(
    lambda row: total_bowling_avg_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_bowling_avg_team2_last_10'] = test_data.progress_apply(
    lambda row: total_bowling_avg_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 156.16it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 152.48it/s]


### Combined average Bowling Strike rate based on team's last n matches


In [None]:
def total_bowling_SR_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(bowler_lvl_data['bowler_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bowl')
      df = ((df['balls_bowled']) / (df['wicket_count']+1)).mean() if df.size !=0 else 0
      result = np.append(result,df)

    return np.nanmean(result)

In [None]:
train_data['total_bowling_SR_team1_last_10'] = train_data.progress_apply(
    lambda row: total_bowling_SR_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_bowling_SR_team2_last_10'] = train_data.progress_apply(
    lambda row: total_bowling_SR_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:06<00:00, 150.26it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:06<00:00, 153.14it/s]


In [None]:
test_data['total_bowling_SR_team1_last_10'] = test_data.progress_apply(
    lambda row: total_bowling_SR_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_bowling_SR_team2_last_10'] = test_data.progress_apply(
    lambda row: total_bowling_SR_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 153.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 152.51it/s]


### Combined average Economy based on team's last n matches


In [None]:
def total_avg_economy_last_10(player_list,date,n=5):

    player_list = pd.Series(str(player_list).split(':')).apply(float)

    result = np.array([])
    for player in player_list[player_list.isin(bowler_lvl_data['bowler_id'])]:
      df = giveLastNgamesPlayer(player_id=player,date=date,n=n,bat_or_bowl = 'bowl')
      df = df['economy'].mean() if df.size !=0 else 0
      result = np.append(result,df)

    return np.nanmean(result)

In [None]:
train_data['total_avg_economy_team1_last_10'] = train_data.progress_apply(
    lambda row: total_avg_economy_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['total_avg_economy_team2_last_10'] = train_data.progress_apply(
    lambda row: total_avg_economy_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:05<00:00, 177.78it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:05<00:00, 184.56it/s]


In [None]:
test_data['total_avg_economy_team1_last_10'] = test_data.progress_apply(
    lambda row: total_avg_economy_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['total_avg_economy_team2_last_10'] = test_data.progress_apply(
    lambda row: total_avg_economy_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 177.58it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 179.34it/s]


### Encoding of Series_type which the match belongs to
0 - Domestic


---


1 - IPL


---


2 - International

In [None]:
# Extract the unique series_name and series_type mapping
series_mapping = match_lvl_data[['series_name', 'series_type']].drop_duplicates()

# Create a dictionary for mapping
series_dict = pd.Series(series_mapping.series_type.values, index=series_mapping.series_name).to_dict()

# Map the series_type to train_data
train_data['series_type'] = train_data['series_name'].map(series_dict)

train_data['series_type'] = pd.factorize(train_data['series_type'])[0]
train_data['season'] = pd.factorize(train_data['season'])[0]

In [None]:
test_data['series_type'] = test_data['series_name'].map(series_dict)

test_data['series_type'] = pd.factorize(test_data['series_type'])[0]
test_data['season'] = pd.factorize(test_data['season'])[0]

### Combined Batting Strength based on Team players last n performance

In [None]:
def batStrLastn(player_list, date, n=5):
    """
    Function to get the batting strength based on the sum of a weighted score of runs and strike rate
    for players in the roster of a team in the last n games.

    Parameters:
    - player_list: ':' separated list of player ids in the roster of a team.
    - date: match date of the game to calculate this feature.
    - n: Number of games to look back and create this feature.

    Returns:
    - float value denoting sum of batting strength scores of all players in the roster.
    """

    player_list = str(player_list).split(':') # split string of ':' separated ids into a list of ids
    res_list = []

    for player in player_list: # loop over each player_id in roster
        df_rel = giveLastNgamesPlayer(player_id=player, date=date, n=n, bat_or_bowl='bat') # getting batting stats from last n games for each player.

        if df_rel.size != 0:
            df_rel['runs'] = pd.to_numeric(df_rel['runs'], errors='coerce')
            df_rel['strike_rate'] = pd.to_numeric(df_rel['strike_rate'], errors='coerce')

            # Fill NaN values with 0
            df_rel['runs'].fillna(0, inplace=True)
            df_rel['strike_rate'].fillna(0, inplace=True)

            df_rel['bat_strength'] = 0.4 * df_rel['runs'] + 0.6 * df_rel['strike_rate']

            # Append the mean batting strength for the player over the last n games
            res_list.append(df_rel['bat_strength'].mean())
        else:
            res_list.append(0)

    # Return the sum of all players' batting strengths
    return np.nansum(res_list)

In [None]:
train_data['batStrLastn_team1'] = train_data.progress_apply(
    lambda row: batStrLastn(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['batStrLastn_team2'] = train_data.progress_apply(
    lambda row: batStrLastn(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:14<00:00, 66.82it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:13<00:00, 68.03it/s]


In [None]:
test_data['batStrLastn_team1'] = test_data.progress_apply(
    lambda row: batStrLastn(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['batStrLastn_team2'] = test_data.progress_apply(
    lambda row: batStrLastn(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:04<00:00, 65.05it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:04<00:00, 65.24it/s]


### Combined Bowling Strength based on team's players' last n performance

In [None]:
def bowlStrLastn(player_list, date, n=5):

    player_list = str(player_list).split(':') # split string of ':' separated ids into a list of ids
    res_list = []

    for player in player_list: # loop over each player_id in roster
        df_rel = giveLastNgamesPlayer(player_id=player, date=date, n=n, bat_or_bowl='bowl') # getting batting stats from last n games for each player.

        # Ensure relevant columns are numeric
        df_rel['runs'] = pd.to_numeric(df_rel['runs'], errors='coerce')
        df_rel['dots'] = pd.to_numeric(df_rel['dots'], errors='coerce')
        df_rel['maiden'] = pd.to_numeric(df_rel['maiden'], errors='coerce')
        df_rel['wicket_count'] = pd.to_numeric(df_rel['wicket_count'], errors='coerce')
        df_rel['balls_bowled'] = pd.to_numeric(df_rel['balls_bowled'], errors='coerce')
        df_rel['wides'] = pd.to_numeric(df_rel['wides'], errors='coerce')
        df_rel['noballs'] = pd.to_numeric(df_rel['noballs'], errors='coerce')
        df_rel['Sixes'] = pd.to_numeric(df_rel['Sixes'], errors='coerce')
        df_rel['Fours'] = pd.to_numeric(df_rel['Fours'], errors='coerce')

        # Fill NaN values with 0
        df_rel['runs'].fillna(0, inplace=True)
        df_rel['dots'].fillna(0, inplace=True)
        df_rel['maiden'].fillna(0, inplace=True)
        df_rel['wicket_count'].fillna(0, inplace=True)
        df_rel['balls_bowled'].fillna(0, inplace=True)
        df_rel['wides'].fillna(0, inplace=True)
        df_rel['noballs'].fillna(0, inplace=True)
        df_rel['Sixes'].fillna(0, inplace=True)
        df_rel['Fours'].fillna(0, inplace=True)

        df_rel['bowl_avg'] = df_rel['runs'] / (df_rel['wicket_count']+1)
        df_rel['bowl_sr'] = (df_rel['wicket_count'] * 6) / df_rel['balls_bowled']
        df_rel['bowl_strength'] = 0.7 * df_rel['bowl_sr'] + 0.3 * df_rel['bowl_avg']
        df_rel['bowl_eff'] = 0.2 * df_rel['dots'] + 0.3 * df_rel['maiden'] + 0.5 * df_rel['wicket_count']
        df_rel['discipline_fctor'] = 0.1 * df_rel['wides'] + 0.1 * df_rel['noballs'] + 0.3 * df_rel['Sixes'] + 0.5 * df_rel['Fours']

        df_rel['bowling_str'] = 0.3 * df_rel['bowl_strength'] + 0.3 * df_rel['bowl_eff'] - 0.4 * df_rel['discipline_fctor']
        # Append the mean batting strength for the player over the last n games
        res_list.append(df_rel['bowling_str'].mean())

    # Return the sum of all players' batting strengths
    return np.nansum(res_list)

In [None]:
train_data['bowlStrLastn_team1'] = train_data.progress_apply(
    lambda row: bowlStrLastn(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['bowlStrLastn_team2'] = train_data.progress_apply(
    lambda row: bowlStrLastn(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:46<00:00, 20.58it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:45<00:00, 20.93it/s]


In [None]:
test_data['bowlStrLastn_team1'] = test_data.progress_apply(
    lambda row: bowlStrLastn(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['bowlStrLastn_team2'] = test_data.progress_apply(
    lambda row: bowlStrLastn(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:12<00:00, 20.95it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:12<00:00, 21.19it/s]


### Batting score given to teams on basis of last n performance
Here we have followed ICC's rule of points. [REFER HERE](https://cricclubs.com/rankCalculator.do)

In [None]:
import pandas as pd
import numpy as np

def batScores(player_list, date, n=5):
    player_list = str(player_list).split(':') # split string of ':' separated ids into a list of ids
    res_list = []

    for player in player_list: # loop over each player_id in roster
        df_rel = giveLastNgamesPlayer(player_id=player, date=date, n=n, bat_or_bowl='bat') # getting batting stats from last n games for each player.

        if df_rel.empty:
            res_list.append(0)
            continue

        # Ensure relevant columns are numeric
        df_rel['runs'] = pd.to_numeric(df_rel['runs'], errors='coerce').fillna(0)
        df_rel['strike_rate'] = pd.to_numeric(df_rel['strike_rate'], errors='coerce').fillna(0)
        df_rel['Fours'] = pd.to_numeric(df_rel['Fours'], errors='coerce').fillna(0)
        df_rel['Sixes'] = pd.to_numeric(df_rel['Sixes'], errors='coerce').fillna(0)

        # Count the number of ducks
        df_rel['ducks'] = (df_rel['runs'] == 0).sum()

        # Calculate batting strength
        df_rel['bat_strength'] = (
            df_rel['runs'] +
            df_rel['strike_rate'] +
            2 * df_rel['Sixes'] +
            df_rel['Fours'] +
            np.where(df_rel['strike_rate'] < 50, -10, 0) +
            np.where(df_rel['strike_rate'] > 100, 10, 0) +
            np.where(df_rel['strike_rate'] > 125, 10, 0) +
            np.where(df_rel['strike_rate'] > 150, 10, 0) +
            np.where(df_rel['strike_rate'] > 175, 10, 0) +
            np.where(df_rel['strike_rate'] > 200, 10, 0) +
            np.where(df_rel['runs'] > 10, 10, 0) +
            np.where(df_rel['runs'] > 20, 10, 0) +
            np.where(df_rel['runs'] > 30, 10, 0) +
            np.where(df_rel['runs'] > 40, 10, 0) +
            np.where(df_rel['runs'] > 50, df_rel['runs'], 0) +
            (-10 * df_rel['ducks'])
        )

        # Append the mean batting strength for the player over the last n games
        res_list.append(df_rel['bat_strength'].mean())

    # Return the sum of all players' batting strengths
    return np.nansum(res_list)

In [None]:
train_data['batScores_team1'] = train_data.progress_apply(
    lambda row: batScores(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['batScores_team2'] = train_data.progress_apply(
    lambda row: batScores(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:29<00:00, 32.25it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:29<00:00, 32.54it/s]


In [None]:
test_data['batScores_team1'] = test_data.progress_apply(
    lambda row: batScores(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['batScores_team2'] = test_data.progress_apply(
    lambda row: batScores(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:08<00:00, 32.39it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:08<00:00, 33.21it/s]


### Bowling score given to teams on basis of last n performance
Here we have followed ICC's rule of points. [REFER HERE](https://cricclubs.com/rankCalculator.do)

In [None]:
# Split the 'bowler_details' column into separate columns
bowler_details_split = bowler_lvl_data['bowler_details'].str.split(':', expand=True)

# Assign new column names based on the information in the 'bowler_details' column
bowler_details_split.columns = ['country', 'batting_hand', 'bowling_style','blank']

In [None]:
# Concatenate the new columns with the original DataFrame
bowler_lvl_data = pd.concat([bowler_lvl_data, bowler_details_split], axis=1)

# Drop the original 'bowler_details' column if no longer needed
bowler_lvl_data.drop(columns=['bowler_details','blank'], inplace=True)

In [None]:
def economy_points(economy):
    if 0.00 <= economy <= 1.99:
        return 30
    elif 2.00 <= economy <= 3.99:
        return 20
    elif 4.00 <= economy <= 5.99:
        return 10
    elif 6.00 <= economy <= 7.99:
        return 0
    elif 8.00 <= economy <= 9.99:
        return -10
    elif 10.00 <= economy <= 11.99:
        return -20
    else:
        return -30

In [None]:
def wicket_points(wickets):
    if wickets == 1:
        return 10
    elif wickets == 2:
        return 20
    elif wickets == 3:
        return 40
    elif wickets == 4:
        return 60
    elif wickets >= 5:
        return 80
    else:
        return 0

In [None]:
bowler_lvl_data['total_points'] = (
    bowler_lvl_data['maiden'] * 60 +  # Maiden over points
    bowler_lvl_data['economy'].apply(economy_points) +  # Economy points
    bowler_lvl_data['wicket_count'].apply(wicket_points) +  # Wicket points
    bowler_lvl_data['dots'] +  # Dot balls points
    bowler_lvl_data['Fours'] * -2 +  # Negative points for fours
    bowler_lvl_data['Sixes'] * -3 +  # Negative points for sixes
    bowler_lvl_data['wides'] * -1 +  # Negative points for wides
    bowler_lvl_data['noballs'] * -1  # Negative points for no-balls
)

In [None]:
# Rank players according to their total points
bowler_lvl_data['rank'] = bowler_lvl_data['total_points'].rank(ascending=False, method='min')

# Sort by rank to see the ranking order
bowler_lvl_data.sort_values(by='rank', inplace=True)

In [None]:
def bowling_scores_last_10(player_list, date, n=10):
    player_list = pd.Series(str(player_list).split(':')).apply(float)
    result = []

    for player in player_list[player_list.isin(bowler_lvl_data['bowler_id'])]:
        df = bowler_lvl_data[bowler_lvl_data['bowler_id'] == player]
        df = df[df['match_dt'] < date].sort_values(by='match_dt', ascending=False).head(n)
        avg_points = df['total_points'].mean() if not df.empty else 0
        result.append(avg_points)

    return np.nanmean(result)

In [None]:
train_data['bowling_scores_team1'] = train_data.progress_apply(
    lambda row: bowling_scores_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
train_data['bowling_scores_team2'] = train_data.progress_apply(
    lambda row: bowling_scores_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:06<00:00, 145.41it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:06<00:00, 150.35it/s]


In [None]:
test_data['bowling_scores_team1'] = test_data.progress_apply(
    lambda row: bowling_scores_last_10(row['team1_roster_ids'],row['match_dt']),axis=1)
test_data['bowling_scores_team2'] = test_data.progress_apply(
    lambda row: bowling_scores_last_10(row['team2_roster_ids'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 152.79it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:01<00:00, 153.76it/s]


### Probability of wins when toss won/lost

In [None]:
match_lvl_data['toss_winner_id'] = match_lvl_data.apply(
    lambda row: row['team1_id'] if row['toss winner'] == row['team1'] else (
        row['team2_id'] if row['toss winner'] == row['team2'] else None), axis=1)

train_data['toss_winner_id'] = train_data.apply(
    lambda row: row['team1_id'] if row['toss winner'] == row['team1'] else (
        row['team2_id'] if row['toss winner'] == row['team2'] else None), axis=1)

In [None]:
def winsWhenTossLost(team_id, date, n=10):

    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) &
                            ((match_lvl_data['team1_id'] == team_id) | (match_lvl_data['team2_id'] == team_id)) &
                            (match_lvl_data['toss_winner_id'] != team_id)] \
                            .sort_values(by='match_dt', ascending=False).head(n)

    # Count number of wins when the team has lost the toss
    win_count = df_rel[df_rel['winner_id'] == team_id].shape[0]

    # Calculate the total number of toss losses in the last n games
    toss_loss_count = df_rel.shape[0]

    if toss_loss_count == 0:
        win_toss_loss_ratio = 0
    else:
        win_toss_loss_ratio = win_count / toss_loss_count


    return win_toss_loss_ratio

In [None]:
train_data['wins_when_toss_lost_avg10_team1']= train_data.progress_apply(
    lambda row : winsWhenTossLost(row['team1_id'],row['match_dt']),axis=1)
train_data['wins_when_toss_lost_avg10_team2']= train_data.progress_apply(
    lambda row : winsWhenTossLost(row['team2_id'],row['match_dt']),axis=1)


100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 886.97it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 895.48it/s]


In [None]:
test_data['wins_when_toss_lost_avg10_team1']= test_data.progress_apply(
    lambda row : winsWhenTossLost(row['team1_id'],row['match_dt']),axis=1)
test_data['wins_when_toss_lost_avg10_team2']= test_data.progress_apply(
    lambda row : winsWhenTossLost(row['team2_id'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 856.37it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 894.24it/s]


In [None]:
def winsWhenTossWon(team_id, date, n=10):

    # Filter out games with either team1/2_id as input team id, match_dt being before current game's date,
    # where the team won the toss, sort desc by date, and get top n rows (games)
    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) &
                            ((match_lvl_data['team1_id'] == team_id) | (match_lvl_data['team2_id'] == team_id)) &
                            (match_lvl_data['toss_winner_id'] == team_id)] \
                            .sort_values(by='match_dt', ascending=False).head(n)

    # Count number of wins when the team has won the toss
    win_count = df_rel[df_rel['winner_id'] == team_id].shape[0]

    # Calculate the total number of toss wins in the last n games
    toss_win_count = df_rel.shape[0]

    # Calculate the win to toss win ratio
    if toss_win_count == 0:
        win_toss_win_ratio = 0
    else:
        win_toss_win_ratio = win_count / toss_win_count

    return win_toss_win_ratio

In [None]:
train_data['wins_when_toss_won_avg10_team1']= train_data.progress_apply(
    lambda row : winsWhenTossWon(row['team1_id'],row['match_dt']),axis=1)
train_data['wins_when_toss_won_avg10_team2']= train_data.progress_apply(
    lambda row : winsWhenTossWon(row['team2_id'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 893.58it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 900.00it/s]


In [None]:
test_data['wins_when_toss_won_avg10_team1']= test_data.progress_apply(
    lambda row : winsWhenTossWon(row['team1_id'],row['match_dt']),axis=1)
test_data['wins_when_toss_won_avg10_team2']= test_data.progress_apply(
    lambda row : winsWhenTossWon(row['team2_id'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 876.89it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 892.84it/s]


### Margin of Victory for teams For last n games

In [None]:
def margin_Of_Victory_Last_10(team_id, date, n=10):

    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) &
                            ((match_lvl_data['team1_id'] == team_id) | (match_lvl_data['team2_id'] == team_id))] \
                            .sort_values(by='match_dt', ascending=False).head(n)

    df_wins = df_rel[df_rel['winner_id'] == team_id]

    margins = []

    for _, row in df_wins.iterrows():
        if row['by'] == 'runs':
            margin = row['win amount']
        elif row['by'] == 'wickets':
            margin = row['win amount']
        else:
            margin = 0
        margins.append(margin)

    if len(margins) == 0:
        return 0
    average_margin = sum(margins) / len(margins)
    return round(average_margin, 2)

In [None]:
train_data['Margin_of_victory_avg_last10_team1'] = train_data.progress_apply(
    lambda row: margin_Of_Victory_Last_10(row['team1_id'],row['match_dt']),axis=1)
train_data['Margin_of_victory_avg_last10_team2'] = train_data.progress_apply(
    lambda row: margin_Of_Victory_Last_10(row['team2_id'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 705.46it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 725.32it/s]


In [None]:
test_data['Margin_of_victory_avg_last10_team1'] = test_data.progress_apply(
    lambda row: margin_Of_Victory_Last_10(row['team1_id'],row['match_dt']),axis=1)
test_data['Margin_of_victory_avg_last10_team2'] = test_data.progress_apply(
    lambda row: margin_Of_Victory_Last_10(row['team2_id'],row['match_dt']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 686.21it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 663.08it/s]


### Batting scores (from above) for teams in last n games on that ground



In [None]:
def meanBatScoresOnGround(player_list, ground_id, match_dt, n=5):
    player_list = str(player_list).split(':') # split string of ':' separated ids into a list of ids
    res_list = []

    # Merge batsman_lvl_data with match_lvl_data to get the ground_id
    merged_df = pd.merge(batsman_lvl_data, match_lvl_data[['match id', 'ground_id']], on='match id', how='left')

    # Filter the merged data for the given ground and match date
    df_filtered = merged_df[(merged_df['ground_id'] == ground_id) & (merged_df['match_dt'] < match_dt)]

    for player in player_list:
        player_data = df_filtered[df_filtered['batsman_id'] == float(player)]

        if player_data.empty:
            res_list.append(0)
            continue

        # Ensure relevant columns are numeric
        player_data['runs'] = pd.to_numeric(player_data['runs'], errors='coerce').fillna(0)
        player_data['strike_rate'] = pd.to_numeric(player_data['strike_rate'], errors='coerce').fillna(0)
        player_data['Fours'] = pd.to_numeric(player_data['Fours'], errors='coerce').fillna(0)
        player_data['Sixes'] = pd.to_numeric(player_data['Sixes'], errors='coerce').fillna(0)

        # Count the number of ducks
        player_data['ducks'] = (player_data['runs'] == 0).sum()

        # Calculate batting strength
        player_data['bat_strength'] = (
            player_data['runs'] +
            player_data['strike_rate'] +
            2 * player_data['Sixes'] +
            player_data['Fours'] +
            np.where(player_data['strike_rate'] < 50, -10, 0) +
            np.where(player_data['strike_rate'] > 100, 10, 0) +
            np.where(player_data['strike_rate'] > 125, 10, 0) +
            np.where(player_data['strike_rate'] > 150, 10, 0) +
            np.where(player_data['strike_rate'] > 175, 10, 0) +
            np.where(player_data['strike_rate'] > 200, 10, 0) +
            np.where(player_data['runs'] > 10, 10, 0) +
            np.where(player_data['runs'] > 20, 10, 0) +
            np.where(player_data['runs'] > 30, 10, 0) +
            np.where(player_data['runs'] > 40, 10, 0) +
            np.where(player_data['runs'] > 50, player_data['runs'], 0) +
            (-10 * player_data['ducks'])
        )

        # Append the mean batting strength for the player on the specific ground
        res_list.append(player_data['bat_strength'].mean())
    return np.nanmean(res_list)

In [None]:
train_data['meanBatScoresOnGround_team1'] = train_data.progress_apply(
    lambda row: meanBatScoresOnGround(row['team1_roster_ids'],row['ground_id'],row['match_dt']),axis=1)
train_data['meanBatScoresOnGround_team2'] = train_data.progress_apply(
    lambda row: meanBatScoresOnGround(row['team2_roster_ids'], row['ground_id'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:20<00:00, 46.96it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:21<00:00, 44.90it/s]


In [None]:
test_data['meanBatScoresOnGround_team1'] = test_data.progress_apply(
    lambda row: meanBatScoresOnGround(row['team1_roster_ids'],row['ground_id'],row['match_dt']),axis=1)
test_data['meanBatScoresOnGround_team2'] = test_data.progress_apply(
    lambda row: meanBatScoresOnGround(row['team2_roster_ids'], row['ground_id'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:05<00:00, 46.81it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:05<00:00, 45.41it/s]


### Bowling scores (from above) for teams in last n games on that ground


In [None]:
def mean_bowlScores_On_Ground(player_list, ground_id, match_dt, n=5):
    player_list = pd.Series(str(player_list).split(':')).apply(float)
    result = []
    merged_df = pd.merge(bowler_lvl_data, match_lvl_data[['match id', 'ground_id']], on='match id', how='left')
    for player in player_list[player_list.isin(merged_df['bowler_id'])]:
        df = bowler_lvl_data[(merged_df['bowler_id'] == player) & (merged_df['ground_id']==ground_id)]
        df = df[df['match_dt'] < match_dt].sort_values(by='match_dt', ascending=False).head(n)
        avg_points = df['total_points'].mean() if not df.empty else 0
        result.append(avg_points)

    return np.nanmean(result)

In [None]:
train_data['mean_bowlScores_On_Ground_team1'] = train_data.progress_apply(
    lambda row: mean_bowlScores_On_Ground(row['team1_roster_ids'],row['ground_id'],row['match_dt']),axis=1)
train_data['mean_bowlScores_On_Ground_team2'] = train_data.progress_apply(
    lambda row: mean_bowlScores_On_Ground(row['team2_roster_ids'], row['ground_id'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:12<00:00, 76.07it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 948/948 [00:11<00:00, 79.41it/s]


In [None]:
test_data['mean_bowlScores_On_Ground_team1'] = test_data.progress_apply(
    lambda row: mean_bowlScores_On_Ground(row['team1_roster_ids'],row['ground_id'],row['match_dt']),axis=1)
test_data['mean_bowlScores_On_Ground_team2'] = test_data.progress_apply(
    lambda row: mean_bowlScores_On_Ground(row['team2_roster_ids'], row['ground_id'],row['match_dt']),axis=1)

100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:03<00:00, 81.08it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 271/271 [00:03<00:00, 81.76it/s]


### Combined average strike rate of teams for last n games on that ground

In [None]:
def avg_strike_rate_on_ground(player_list, date, ground_id, n=5):
    # Convert the player_list to a pandas Series and ensure it's numeric
    player_list = pd.Series(str(player_list).split(':')).apply(float)

    merged_df = pd.merge(batsman_lvl_data, match_lvl_data[['match id', 'ground_id']], on='match id', how='left')
    result = np.array([])
    filtered_df = merged_df[(merged_df['ground_id'] == ground_id) & (merged_df['match_dt'] < date)]
    if filtered_df.size != 0:
        for player in player_list:

            df = filtered_df[filtered_df['batsman_id'] == player]
            result = np.append(result, (df['strike_rate'].mean() if df.size != 0 else 0))
        return np.nanmean(result)
    else:
        return 0

In [None]:
train_data['avg_strike_rate_on_ground_team1'] = train_data.progress_apply(
    lambda row: avg_strike_rate_on_ground(row['team1_roster_ids'],row['match_dt'],row['ground_id']),axis=1)
train_data['avg_strike_rate_on_ground_team2'] = train_data.progress_apply(
    lambda row: avg_strike_rate_on_ground(row['team2_roster_ids'],row['match_dt'],row['ground_id']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 129.82it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 126.21it/s]


In [None]:
test_data['avg_strike_rate_on_ground_team1'] = test_data.progress_apply(
    lambda row: avg_strike_rate_on_ground(row['team1_roster_ids'],row['match_dt'],row['ground_id']),axis=1)
test_data['avg_strike_rate_on_ground_team2'] = test_data.progress_apply(
    lambda row: avg_strike_rate_on_ground(row['team2_roster_ids'],row['match_dt'],row['ground_id']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 127.17it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 123.76it/s]


### Combined average Economy of teams for last n games on that ground

In [None]:
def avg_economy_on_ground(player_list, date, ground_id, n=5):
    # Convert the player_list to a pandas Series and ensure it's numeric
    player_list = pd.Series(str(player_list).split(':')).apply(float)

    merged_df = pd.merge(bowler_lvl_data, match_lvl_data[['match id', 'ground_id']], on='match id', how='left')
    result = np.array([])
    filtered_df = merged_df[(merged_df['ground_id'] == ground_id) & (merged_df['match_dt'] < date)]
    if filtered_df.size != 0:
        for player in player_list:

            df = filtered_df[filtered_df['bowler_id'] == player]
            result = np.append(result, (df['economy'].mean() if df.size != 0 else 0))
        return np.nanmean(result)
    else:
        return 0

In [None]:
train_data['avg_economy_on_ground_team1'] = train_data.progress_apply(
    lambda row: avg_economy_on_ground(row['team1_roster_ids'],row['match_dt'],row['ground_id']),axis=1)
train_data['avg_economy_on_ground_team2'] = train_data.progress_apply(
    lambda row: avg_economy_on_ground(row['team2_roster_ids'],row['match_dt'],row['ground_id']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 125.10it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:07<00:00, 122.84it/s]


In [None]:
test_data['avg_economy_on_ground_team1'] = test_data.progress_apply(
    lambda row: avg_economy_on_ground(row['team1_roster_ids'],row['match_dt'],row['ground_id']),axis=1)
test_data['avg_economy_on_ground_team2'] = test_data.progress_apply(
    lambda row: avg_economy_on_ground(row['team2_roster_ids'],row['match_dt'],row['ground_id']),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 116.68it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:02<00:00, 117.90it/s]


### Win percent of teams for last n games on that ground

In [None]:
def winp_on_ground(team_id, ground_id, date, match_lvl_data):

    df_rel = match_lvl_data[(match_lvl_data['match_dt'] < date) & (match_lvl_data['ground_id'] == ground_id)].sort_values(by='match_dt', ascending=False)

    # Filter rows where the team_id matches either team1_id or team2_id
    df_rel = df_rel[(df_rel['team1_id'] == team_id) | (df_rel['team2_id'] == team_id)]
    win_df = df_rel[df_rel['winner_id']==team_id]
    if df_rel.size == 0 :
        return 0
    else:
        return len(win_df)/len(df_rel)

In [None]:
train_data['winp_on_ground_team1'] = train_data.progress_apply(
    lambda row: winp_on_ground(row['team1_id'],row['ground_id'],row['match_dt'],match_lvl_data),axis=1)
train_data['winp_on_ground_team2'] = train_data.progress_apply(
    lambda row: winp_on_ground(row['team2_id'],row['ground_id'],row['match_dt'],match_lvl_data),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 694.11it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 948/948 [00:01<00:00, 668.83it/s]


In [None]:
test_data['winp_on_ground_team1'] = test_data.progress_apply(
    lambda row: winp_on_ground(row['team1_id'],row['ground_id'],row['match_dt'],match_lvl_data),axis=1)
test_data['winp_on_ground_team2'] = test_data.progress_apply(
    lambda row: winp_on_ground(row['team2_id'],row['ground_id'],row['match_dt'],match_lvl_data),axis=1)

100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 653.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 271/271 [00:00<00:00, 689.28it/s]


In [None]:
train_data.fillna(0, inplace=True)

Splitting the data based on series type for training different models for different series_type matches.

In [None]:
train_0 = train_data[train_data['series_type']==0]
train_1 = train_data[train_data['series_type']==1]
train_2 = train_data[train_data['series_type']==2]
test_0 = test[test['series_type']==0]
test_1 = test[test['series_type']==1]
test_2 = test[test['series_type']==2]

Defining the Variables

Please note: this is just one of the feature combinations, the rest of the feature combinations tried are listed in a table ahead

In [None]:
X,y = train_data[['total_strike_rate_team_1_last10', 'total_strike_rate_team_2_last10',
       'total_fours_avg_team1_last10', 'total_fours_avg_team2_last10',
       'total_sixes_avg_team1_last10', 'total_sixes_avg_team2_last10',
       'total_avg_wicket_team1_last10', 'total_avg_wicket_team2_last10',
       'total_bowling_SR_team1_last_10', 'total_bowling_SR_team2_last_10',
       'team_count_50runs_last15','team_winp_last5',
        'ground_avg_runs_last15']], train_data['winner_01']

In [None]:
X_test = train_data[['total_strike_rate_team_1_last10', 'total_strike_rate_team_2_last10',
       'total_fours_avg_team1_last10', 'total_fours_avg_team2_last10',
       'total_sixes_avg_team1_last10', 'total_sixes_avg_team2_last10',
       'total_avg_wicket_team1_last10', 'total_avg_wicket_team2_last10',
       'total_bowling_SR_team1_last_10', 'total_bowling_SR_team2_last_10',
       'team_count_50runs_last15','team_winp_last5',
        'ground_avg_runs_last15']]

# Hyper parameter tuning of Models using OPTUNA

### Hyperparameter Tuning of Catboost Model

In [None]:
def objective(trial):
    # Define the hyperparameters
    catboost_params = {
        'iterations': trial.suggest_int('iterations', 10, 400),
        'depth': trial.suggest_int('depth', 2, 8),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 0.01, 1.5),
        'bagging_temperature': trial.suggest_uniform('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_uniform('random_strength', 0.0, 1.0),
        'border_count': trial.suggest_int('border_count', 1, 255),
        'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
        'od_wait': trial.suggest_int('od_wait', 5, 50),
        'verbose': 0
    }
    model = CatBoostClassifier(**catboost_params, random_state=42)
    model.fit(X,y)
    score = model.score(X_test_2,y_test_2)

    return score

# Create the Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Get the best hyperparameters
best_params_catboost = study.best_params
print("Best Hyperparameters: ", best_params_catboost)

##### K-Fold Cross Validating the model

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in skf.split(X, y):
    X_train, X_valid = X.iloc[train_index], X.iloc[test_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[test_index]

    train_pool = Pool(data=X_train, label=y_train)
    valid_pool = Pool(data=X_valid, label=y_valid)

    model = CatBoostClassifier(**best_params_catboost, random_state=42, verbose=0)
    model.fit(train_pool)

    preds = model.predict(X_valid)
    print(accuracy_score(y_valid, preds))
    accuracies.append(accuracy_score(y_valid, preds))
print(f'Mean_accuracy :{np.mean(accuracies)}')

0.5157894736842106
0.5578947368421052
0.5052631578947369
0.5368421052631579
0.6105263157894737
0.5368421052631579
0.6
0.5684210526315789
0.5638297872340425
0.5425531914893617
Mean_accuracy :0.5537961926091826


In [None]:
cbc = CatBoostClassifier(**best_params_catboost, random_state=42)

In [None]:
cbc.fit(X,y, verbose=0)

<catboost.core.CatBoostClassifier at 0x21c55149f10>

In [None]:
cbc.score(X_test,y_test)

0.66

### Hyperparameter Tuning of GBM Model

In [None]:
import optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
def objective(trial):
    gb_params = {
        'n_estimators': trial.suggest_int('n_estimators', 15, 800),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'max_depth': trial.suggest_int('max_depth', 2, 7),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 100),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 100),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'max_features': trial.suggest_uniform('max_features', 0.5, 1.0),
        'verbose': 0
    }
    gbm = GradientBoostingClassifier(**gb_params, random_state = 42)

        # Train the model
    gbm.fit(X, y)

        # Predict on the test set
    score = gbm.score(X_test_2,y_test_2)


    # Return the average accuracy over the folds
    return score

# Create the study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best trial
best_trial_GBM = study.best_trial
print("Best trial: ", best_trial_GBM.values)
print("Best parameters: ", best_trial_GBM.params)

##### K-Fold Cross Validating the model

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in skf.split(X, y):
    X_train, X_valid = X.iloc[train_index], X.iloc[test_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[test_index]

    gbm = GradientBoostingClassifier(**gbm_params, random_state = 42)
    gbm.fit(X_train, y_train)

    preds = gbm.predict(X_valid)
    accuracy = accuracy_score(y_valid, preds)
    print(accuracy)
    accuracies.append(accuracy)

print(f"Mean Accuracy: {np.mean(accuracies)}")

0.5263157894736842
0.6105263157894737
0.47368421052631576
0.4842105263157895
0.5894736842105263
0.5052631578947369
0.6
0.5789473684210527
0.5425531914893617
0.5638297872340425
Mean Accuracy: 0.5474804031354983


In [None]:
gbm = GradientBoostingClassifier(**gbm_params, random_state = 42)

In [None]:
gbm.fit(X,y)

In [None]:
gbm.score(X_test,y_test)

0.62

### Hyperparameter Tuning of XGBOOST Model

In [None]:
import optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
def objective(trial):
    xgboost_params = {
        'max_depth': trial.suggest_int('max_depth', 2, 8),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'n_estimators': trial.suggest_int('n_estimators', 20, 1000),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_loguniform('gamma', 1e-4, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 5),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.001, 5.0),
        'verbosity': 0
    }

    # Initialize Stratified K-Fold
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    accuracies = []
    model = xgb.XGBClassifier(**xgboost_params, random_state = 42)

    model.fit(X, y)
    accuracy = model.score(X_test_2,y_test_2)
    # accuracies.append(accuracy)

    # Return the average accuracy over the folds
    return accuracy

# Create the study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)

# Best trial
best_trial_XGB = study.best_trial
print("Best trial: ", best_trial_XGB.values)
print("Best parameters: ", best_trial_XGB.params)

##### K-Fold Cross Validating the model

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in skf.split(X, y):
    X_train, X_valid = X.iloc[train_index], X.iloc[test_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[test_index]

    model = xgb.XGBClassifier(**xgb_params, random_state = 42)
    model.fit(X_train, y_train)

    preds = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, preds)
    print(accuracy)
    accuracies.append(accuracy)

print(f"Mean Accuracy: {np.mean(accuracies)}")

0.6
0.5157894736842106
0.4842105263157895
0.5578947368421052
0.6526315789473685
0.5157894736842106
0.5473684210526316
0.6210526315789474
0.5425531914893617
0.5319148936170213
Mean Accuracy: 0.5569204927211647


In [None]:
model = xgb.XGBClassifier(**xgb_params, random_state = 42)

In [None]:
model.fit(X,y)

### Hyperparameter Tuning of Light GBM Model

In [None]:
import optuna
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
def objective(trial):
    lightgbm_params = {
        'num_leaves': trial.suggest_int('num_leaves', 31, 500),
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'n_estimators': trial.suggest_int('n_estimators', 30, 1000),
        'min_child_weight': trial.suggest_loguniform('min_child_weight', 0.01, 5),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'verbosity': -1
    }
    LGB = lgb.LGBMClassifier(**lightgbm_params, random_state = 42)

        # Train the model
    LGB.fit(X, y)

        # Predict on the test set
    score = LGB.score(X_test_2,y_test_2)

    # Return the average accuracy over the folds
    return score

# Create the study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1000)
# Best trial
best_trial_LGB = study.best_trial
print("Best trial: ", best_trial_LGB.values)
print("Best parameters: ", best_trial_LGB.params)

##### K-Fold Cross Validating the model

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
accuracies = []
for train_index, test_index in skf.split(X, y):
    X_train, X_valid = X.iloc[train_index], X.iloc[test_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[test_index]

    LGB = lgb.LGBMClassifier(**lgb_params, random_state=42, verbosity = -1)
    LGB.fit(X_train, y_train)

    preds = LGB.predict(X_valid)
    accuracy = accuracy_score(y_valid, preds)
    print(accuracy)
    accuracies.append(accuracy)

print(f"Mean Accuracy: {np.mean(accuracies)}")

0.5684210526315789
0.5368421052631579
0.5263157894736842
0.631578947368421
0.5157894736842106
0.4842105263157895
0.5473684210526316
0.5368421052631579
0.5
0.5851063829787234
Mean Accuracy: 0.5432474804031355


In [None]:
LGB = lgb.LGBMClassifier(**lgb_params, random_state=42, verbosity = -1)

In [None]:
LGB.fit(X,y)

### Exploring Various Feature Selection Methods:

In [None]:
feature_desc = {
    'team_count_50runs_last15': 'Ratio of number of 50s by players in team1 to number of 50s by players in team2 in last 15 games',
    'team_winp_last5': 'Ratio of team1\'s win % to team2\'s win % in last 5 games',
    'team1only_avg_runs_last15': 'Team1\'s average inning runs in last 15 games',
    'team1_winp_team2_last15': 'Team1\'s win percentage against Team2 in last 15 games',
    'ground_avg_runs_last15': 'Average runs scored in the ground in last 15 games',
    'winner_01': 'Binary indicator for the winning team (1 if team2 wins, else 0)',
    'toss_winner_01': 'Binary indicator for the toss winner (1 if team2 wins, else 0)',
    'toss_decision_01': 'Binary indicator for toss decision (1 if winner bats, 0 otherwise)',
    'team1_bp': 'Batting performance measure considering match situation and player strike rate over the last 5 matches for team1&#8203;:citation[oaicite:2]{index=2}&#8203;',
    'team2_bp': 'Batting performance measure considering match situation and player strike rate over the last 5 matches for team1 &#8203;:citation[oaicite:2]{index=2}&#8203;',
    'team1_acbr': 'Adjusted combined bowling rate for team1 over last 5 matches accounting for match situation and wicket weights, with lower values indicating better performance&#8203;:citation[oaicite:0]{index=0}&#8203;',
    'team2_acbr': 'Adjusted combined bowling rate for team2 over last 5 matches accounting for match situation and wicket weights, with lower values indicating better performance&#8203;:citation[oaicite:0]{index=0}&#8203;',
    'strength_team1': 'Overall strength of team1, combining batting performance (BP) and adjusted combined bowling rate (ACBR)&#8203;:citation[oaicite:2]{index=2}&#8203;',
    'strength_team2': 'Overall strength of team2, combining batting performance (BP) and adjusted combined bowling rate (ACBR)&#8203;:citation[oaicite:2]{index=2}&#8203;',
    'current_form_team1':'Weighted average of match outcomes using team strength as weights  over the last 5 games of  team1, reflecting recent performance &#8203;:citation[oaicite:1]{index=1}&#8203;',
    'current_form_team2':'Weighted average of match outcomes using team strength as weights  over the last 5 games of  team2 , reflecting recent performance &#8203;:citation[oaicite:1]{index=1}&#8203;',
    'team1_count_50runs_last15': 'Number of 50s scored by team1 in last 15 games',
    'team2_count_50runs_last15': 'Number of 50s scored by team2 in last 15 games',
    'team1_winp_last5': 'Team1\'s win percentage in last 5 games',
    'team2_winp_last5': 'Team2\'s win percentage in last 5 games',
    'team2only_avg_runs_last15': 'Team2\'s average inning runs in last 15 games',
    'total_strike_rate_team_1_last10': 'Total strike rate of team1 in last 10 games',
    'total_strike_rate_team_2_last10': 'Total strike rate of team2 in last 10 games',
    'total_fours_avg_team1_last10': 'Average number of fours hit by team1 in last 10 games',
    'total_fours_avg_team2_last10': 'Average number of fours hit by team2 in last 10 games',
    'total_sixes_avg_team1_last10': 'Average number of sixes hit by team1 in last 10 games',
    'total_sixes_avg_team2_last10': 'Average number of sixes hit by team2 in last 10 games',
    'total_avg_wicket_team1_last10': 'Average wickets taken by team1 in last 10 games',
    'total_avg_wicket_team2_last10': 'Average wickets taken by team2 in last 10 games',
    'total_run_rate_team_1_last10': 'Total run rate of team1 in last 10 games',
    'total_run_rate_team_2_last10': 'Total run rate of team2 in last 10 games',
    'num_all_rounders_team_1': 'Number of all-rounders in team1',
    'num_all_rounders_team_2': 'Number of all-rounders in team2',
    'total_bowling_avg_team1_last_10': 'Total bowling average of team1 in last 10 games',
    'total_bowling_avg_team2_last_10': 'Total bowling average of team2 in last 10 games',
    'total_bowling_SR_team1_last_10': 'Total bowling strike rate of team1 in last 10 games',
    'total_bowling_SR_team2_last_10': 'Total bowling strike rate of team2 in last 10 games',
    'total_avg_economy_team1_last_10': 'Avg economy rate of team1 in last 10 games',
    'total_avg_economy_team2_last_10': 'Avg economy rate of team2 in last 10 games',
    'series_type': 'Type of series the match belongs to',
    'batStrLastn_team1':  'Created custom weights to calculate average batsmen scores for team 1 over the last 10 matches, based on runs, sixes, fours, strike rate, and ducks',
    'batStrLastn_team2':  'Created custom weights to calculate average batsmen scores for team 1 over the last 10 matches, based on runs, sixes, fours, strike rate, and ducks',
    'bowlStrLastn_team1': 'Bowling strength of team1 in last 5 games',
    'bowlStrLastn_team2': 'Bowling strength of team2 in last 5 games',
    'batScores_team1':  'Followed ICC guidelines to calculate average batsmen scores for team 1 over the last 10 matches, based on runs, sixes, fours, strike rate, and ducks.',
    'batScores_team2':  'Followed ICC guidelines to calculate average batsmen scores for team 2 over the last 10 matches, based on runs, sixes, fours, strike rate, and ducks.',
    'bowling_scores_team1':'Mean of the bowlers\' performance of team 1 based on based on wickets, maiden overs, runs conceded, economy, and runs per over following ICC guidelines in the last 10 matches',
    'bowling_scores_team2':'Mean of the bowlers\' performance of team 2 based on based on wickets, maiden overs, runs conceded, economy, and runs per over following ICC guidelines in the last 10 matches',
    'toss_winner_id': 'Identifier for the toss-winning team',
    'wins_when_toss_lost_avg10_team1': 'Average wins for team1 when they lose the toss in last 10 games',
    'wins_when_toss_lost_avg10_team2': 'Average wins for team2 when they lose the toss in last 10 games',
    'wins_when_toss_won_avg10_team1': 'Average wins for team1 when they win the toss in last 10 games',
    'wins_when_toss_won_avg10_team2': 'Average wins for team2 when they win the toss in last 10 games',
    'Margin_of_victory_avg_last10_team1': 'Margin of victory (checks difference between the number of runs or the number of wickets between winning & losing team) of team1 in the last 10 games',
    'Margin_of_victory_avg_last10_team2': 'Margin of victory (checks difference between the number of runs or the number of wickets between winning & losing team) of team2 in the last 10 games',
    'ground_avg_runs_team1last15': 'Average runs scored by team1 at the ground in last 15 games',
    'ground_avg_runs_team2last15': 'Average runs scored by team2 at the ground in last 15 games',
    'meanBatScoresOnGround_team1': 'Mean batting scores of team1 at the ground  in previous games ',
    'meanBatScoresOnGround_team2': 'Mean batting scores of team2 at the ground in previous games',
    'avg_strike_rate_on_ground_team1': 'Average strike rate of team1 at the ground in previous games',
    'avg_strike_rate_on_ground_team2': 'Average strike rate of team2 at the ground in previous games',
    'avg_economy_on_ground_team1': 'Average economy rate of team1 at the ground in previous games',
    'avg_economy_on_ground_team2': 'Average economy rate of team2 at the ground in previous games',
    'winp_on_ground_team1': 'Win percentage of team1 at the ground in previous games',
    'winp_on_ground_team2': 'Win percentage of team2 at the ground in previous games',
    'mean_bowlScores_On_Ground_team1': 'Mean bowling scores of team1 at the ground in previous games',
    'mean_bowlScores_On_Ground_team2': 'Mean bowling scores of team2 at the ground in previous games.'
}

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif, chi2, mutual_info_classif, RFECV

model0 = GradientBoostingClassifier()
model1 = CatBoostClassifier(verbose=0)
model2 = lgb.LGBMClassifier(verbose=-1)
model3 = xgb.XGBClassifier()

# ANOVA F-value
anova_selector = SelectKBest(f_classif, k=8)
anova_selector.fit(X, y)
anova_selected_features = [X_columns[i] for i in anova_selector.get_support(indices=True)]

# Chi-Square
chi2_selector = SelectKBest(chi2, k=16)
chi2_selector.fit(X, y)
chi2_selected_features = [X_columns[i] for i in chi2_selector.get_support(indices=True)]

# Mutual Information
mi_selector = SelectKBest(mutual_info_classif, k=16)
mi_selector.fit(X, y)
mi_selected_features = [X_columns[i] for i in mi_selector.get_support(indices=True)]

# RFECV
rfecv_selector = RFECV(estimator=model1, step=1, cv=StratifiedKFold(5), scoring='accuracy')
rfecv_selector.fit(X, y)
rfecv_selected_features = [X_columns[i] for i in rfecv_selector.get_support(indices=True)]

# Embedded Method (Feature Importance from CatBoost)
model1.fit(X, y)
importances = model1.feature_importances_
indices = importances.argsort()[-16:][::-1]
embedded_selected_features = [X_columns[i] for i in indices]

# Combine all methods
selected_features_methods = {
    "ANOVA": anova_selected_features,
    "Chi-Square": chi2_selected_features,
    "Mutual Information": mi_selected_features,
    "RFECV": rfecv_selected_features,
    "Embedded": embedded_selected_features
}

for method, features in selected_features_methods.items():
    print(f"{method} selected features: {features}")


# Table summarizing our experimentation

(Scroll Right for all the columns)

| Features Selected                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Model Selected               | Hyperparameters                                                                                                                                                                                                                                                                        | Max CV Score                                                                                                                                                                                                                                                   | Score on Full Training Set |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------- |
| current_form_team1',<br>'current_form_team2', 'batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Gradient Boosting Classifier | {'n_estimators': 118, 'learning_rate': 0.01088769939677726, 'max_depth': 6,<br>'min_samples_split': 73, 'min_samples_leaf': 49, 'subsample': 0.6290729641656438, 'max_features': 0.7875620619898971}                                                                                   | 0.57                                                                                                                                                                                                                                                           | 0.67                       |
| "current_form_team1',<br>'current_form_team2', 'batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2']"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | XGBOOST                      | {'max_depth': 3,<br>'learning_rate': 0.022190626172899443,<br>'n_estimators': 467,<br>'subsample': 0.5113710351999977,<br>'colsample_bytree': 0.7918243406660262,<br>'gamma': 0.00025408413886170075,<br>'reg_alpha': 2.4117368572982178e-06,<br>'reg_lambda': 0.014749899343479334}   | 0.5694                                                                                                                                                                                                                                                         | 0.76                       |
| [['team1_winp_team2_last15',<br>'ground_avg_runs_last15','team_count_50runs_last15', 'team_winp_last5','current_form_team1',<br>'current_form_team2', 'batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2']]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | CATBOOST                     | {'iterations': 38, 'depth': 2, 'learning_rate': 0.08959177979631737, 'l2_leaf_reg': 0.3698120095335513, 'bagging_temperature': 0.8444889666117621, 'random_strength': 0.7438592936527025, 'border_count': 178, 'od_type': 'IncToDec', 'od_wait': 22}<br>                               | 0.5463                                                                                                                                                                                                                                                         | 0.5806                     |
| [['team1_winp_team2_last15',<br>'ground_avg_runs_last15','team_count_50runs_last15', 'team_winp_last5','current_form_team1',<br>'current_form_team2', 'batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2']]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Gradient Boosting Classifier | {'n_estimators': 24, 'learning_rate': 0.1470085616690154, 'max_depth': 3, 'min_samples_split': 76, 'min_samples_leaf': 12, 'subsample': 0.8017508518929026, 'max_features': 0.6836653007949745}                                                                                        | 0.5548                                                                                                                                                                                                                                                         | 0.65                       |
| [['team1_winp_team2_last15',<br>'ground_avg_runs_last15','team_count_50runs_last15', 'team_winp_last5','current_form_team1',<br>'current_form_team2', 'batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2']]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | XGBOOST                      | {'max_depth': 2, 'learning_rate': 0.013414509436043323, 'n_estimators': 347, 'subsample': 0.7016693770327093, 'colsample_bytree': 0.964818140502706, 'gamma': 0.000738412674609453, 'reg_alpha': 4.908608416222712e-06, 'reg_lambda': 1.2283786327929913}<br>                          | 0.549                                                                                                                                                                                                                                                          | 0.6271                     |
| [['team1_winp_team2_last15',<br>'ground_avg_runs_last15','team_count_50runs_last15', 'team_winp_last5','current_form_team1',<br>'current_form_team2', 'batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2']]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | lightGBM                     | {'num_leaves': 98, 'max_depth': 2, 'learning_rate': 0.06956676108940352, 'n_estimators': 105, 'min_child_weight': 0.029531422816284174, 'subsample': 0.5354937713193931, 'colsample_bytree': 0.5000322602303595}<br>                                                                   | 0.5516                                                                                                                                                                                                                                                         | 0.6342                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | CATBOOST                     | {'iterations': 141, 'depth': 6, 'learning_rate': 0.011328275163653322, 'l2_leaf_reg': 0.40404863984547756, 'bagging_temperature': 0.3861119365879258, 'random_strength': 0.4596218575839328, 'border_count': 106, 'od_type': 'Iter', 'od_wait': 16}                                    | 0.5769                                                                                                                                                                                                                                                         | 0.8111                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Gradient Boosting Classifier | {'n_estimators': 110, 'learning_rate': 0.017480500327965037, 'max_depth': 4, 'min_samples_split': 17, 'min_samples_leaf': 13, 'subsample': 0.6547327131247929, 'max_features': 0.6369285703564282}                                                                                     | 0.5674                                                                                                                                                                                                                                                         | 0.7805                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | XGBOOST                      | {'max_depth': 4,<br>'learning_rate': 0.022302073831134226,<br>'n_estimators': 57,<br>'subsample': 0.8101212013676148,<br>'colsample_bytree': 0.6834600561116686,<br>'gamma': 0.005970643625181246,<br>'reg_alpha': 0.00029705917298457286,<br>'reg_lambda': 0.01076174841069285}       | 0.5854                                                                                                                                                                                                                                                         | 0.7964                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | lightGBM                     | {'num_leaves': 130,<br>'max_depth': 9,<br>'learning_rate': 0.018021837539511493,<br>'n_estimators': 58,<br>'min_child_weight': 0.31604281076636703,<br>'subsample': 0.9507984492526929,<br>'colsample_bytree': 0.9720995168325565}                                                     | 0.5737                                                                                                                                                                                                                                                         | 0.8765                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','current_form_team1',<br>'current_form_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | CATBOOST                     | {'iterations': 236, 'depth': 5, 'learning_rate': 0.013398605765741296, 'l2_leaf_reg': 0.10370879073554895, 'bagging_temperature': 0.4413934139959304, 'random_strength': 0.6034045621971232, 'border_count': 107, 'od_type': 'Iter', 'od_wait': 22}                                    | 0.6526315789473685<br>0.5052631578947369<br>0.5789473684210527<br>0.6<br>0.6<br>0.5894736842105263<br>0.631578947368421<br>0.5684210526315789<br>0.48936170212765956<br>0.574468085106383<br>Mean_accuracy :0.5790145576707727<br>                             | 0.8628                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','current_form_team1',<br>'current_form_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | Gradient Boosting Classifier | {'n_estimators': 229,<br>'learning_rate': 0.0193036066172833,<br>'max_depth': 3,<br>'min_samples_split': 77,<br>'min_samples_leaf': 5,<br>'subsample': 0.5691167394593772,<br>'max_features': 0.8636088577796043}                                                                      | 0.4842105263157895<br>0.6<br>0.5052631578947369<br>0.5263157894736842<br>0.6210526315789474<br>0.5157894736842106<br>0.5789473684210527<br>0.6<br>0.574468085106383<br>0.5531914893617021<br>Mean Accuracy: 0.5559238521836506                                 | 0.7637                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','current_form_team1',<br>'current_form_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | XGBOOST                      | {'max_depth': 2,<br>'learning_rate': 0.10776555219559049,<br>'n_estimators': 65,<br>'subsample': 0.5003932824430685,<br>'colsample_bytree': 0.8927632683018213,<br>'gamma': 0.0005663368359376436,<br>'reg_alpha': 3.9464045267085924e-07,<br>'reg_lambda': 0.29802142954924804}       | 0.5894736842105263<br>0.5789473684210527<br>0.5368421052631579<br>0.4842105263157895<br>0.6<br>0.6<br>0.5894736842105263<br>0.631578947368421<br>0.5531914893617021<br>0.5851063829787234<br>Mean Accuracy: 0.5748824188129898                                 | 0.7078                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','current_form_team1',<br>'current_form_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | LightGBM                     | {'num_leaves': 64,<br>'max_depth': 5,<br>'learning_rate': 0.02863616348601301,<br>'n_estimators': 86,<br>'min_child_weight': 0.23215309745155618,<br>'subsample': 0.5054727982559248,<br>'colsample_bytree': 0.9483217428436346}                                                       | 0.6<br>0.5684210526315789<br>0.5052631578947369<br>0.5157894736842106<br>0.5578947368421052<br>0.5789473684210527<br>0.5157894736842106<br>0.6210526315789474<br>0.5638297872340425<br>0.6170212765957447<br>Mean Accuracy: 0.5644008958566629                 | 0.8533                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2, 'Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | CATBOOST                     | {'iterations': 218,<br>'depth': 2,<br>'learning_rate': 0.047742448631327955,<br>'l2_leaf_reg': 0.019592731052610425,<br>'bagging_temperature': 0.9744479821846707,<br>'random_strength': 0.01706806120995335,<br>'border_count': 88,<br>'od_type': 'IncToDec',<br>'od_wait': 5}        | 0.6105263157894737<br>0.5894736842105263<br>0.5578947368421052<br>0.5578947368421052<br>0.6736842105263158<br>0.5789473684210527<br>0.6<br>0.631578947368421<br>0.5<br>0.6170212765957447<br>Mean_accuracy :0.5917021276595744                                 | 0.7531                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2, Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | Gradient Boosting Classifier | {'n_estimators': 327,<br>'learning_rate': 0.016547561335099343,<br>'max_depth': 2,<br>'min_samples_split': 67,<br>'min_samples_leaf': 73,<br>'subsample': 0.8501411187671424,<br>'max_features': 0.6043221696666015}                                                                   | 0.5894736842105263<br>0.6105263157894737<br>0.5894736842105263<br>0.5157894736842106<br>0.6842105263157895<br>0.5578947368421052<br>0.5789473684210527<br>0.5473684210526316<br>0.5319148936170213<br>0.6170212765957447<br>Mean Accuracy: 0.5822620380739082  | 0.6983                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2',' Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | XGBOOST                      | {'max_depth': 2,<br>'learning_rate': 0.04464381106155335,<br>'n_estimators': 348,<br>'subsample': 0.7861400017786948,<br>'colsample_bytree': 0.6299232378833381,<br>'gamma': 0.0013932369188389606,<br>'reg_alpha': 0.0024778298107521047,<br>'reg_lambda': 0.9709137914163241}        | 0.5789473684210527<br>0.6<br>0.5368421052631579<br>0.5684210526315789<br>0.6210526315789474<br>0.5368421052631579<br>0.6<br>0.6105263157894737<br>0.574468085106383<br>0.5531914893617021<br>Mean Accuracy: 0.5780291153415453                                 | 0.809                      |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batScores_team1',<br>'batScores_team2', 'bowling_scores_team1', 'bowling_scores_team2',' Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | LightGBM                     | {'num_leaves': 60,<br>'max_depth': 2,<br>'learning_rate': 0.010611557682801706,<br>'n_estimators': 158,<br>'min_child_weight': 1.3070708407437945,<br>'subsample': 0.9570843295743576,<br>'colsample_bytree': 0.5825967777746597}                                                      | 0.6210526315789474<br>0.6105263157894737<br>0.5368421052631579<br>0.5052631578947369<br>0.6631578947368421<br>0.6<br>0.6<br>0.5894736842105263<br>0.5106382978723404<br>0.5212765957446809<br>Mean Accuracy: 0.5758230683090705                                | 0.6645                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batStrLastn_team1',<br>'batStrLastn_team2', 'bowlStrLastn_team1', 'bowlStrLastn_team2',<br>'Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | CATBOOST                     | {'iterations': 380,<br>'depth': 3,<br>'learning_rate': 0.01263523683862344,<br>'l2_leaf_reg': 0.40343313069895204,<br>'bagging_temperature': 0.17058564970768988,<br>'random_strength': 0.502950657782192,<br>'border_count': 19,<br>'od_type': 'Iter',<br>'od_wait': 25}              | 0.6105263157894737<br>0.6<br>0.5368421052631579<br>0.5263157894736842<br>0.6736842105263158<br>0.5789473684210527<br>0.5894736842105263<br>0.5894736842105263<br>0.5531914893617021<br>0.6063829787234043<br>Mean_accuracy :0.5864837625979844                 | 0.7531                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batStrLastn_team1',<br>'batStrLastn_team2', 'bowlStrLastn_team1', 'bowlStrLastn_team2',<br>'Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Gradient Boosting Classifier | {'n_estimators': 297,<br>'learning_rate': 0.014367650216078841,<br>'max_depth': 2,<br>'min_samples_split': 39,<br>'min_samples_leaf': 19,<br>'subsample': 0.9998712024075692,<br>'max_features': 0.9660231417031876}                                                                   | 0.6421052631578947<br>0.5684210526315789<br>0.5578947368421052<br>0.4842105263157895<br>0.6210526315789474<br>0.5578947368421052<br>0.5894736842105263<br>0.5789473684210527<br>0.5531914893617021<br>0.6170212765957447<br>Mean Accuracy: 0.5770212765957446  | 0.713                      |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batStrLastn_team1',<br>'batStrLastn_team2', 'bowlStrLastn_team1', 'bowlStrLastn_team2',<br>'Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | XGBOOST                      | {'max_depth': 5,<br>'learning_rate': 0.03751786871979571,<br>'n_estimators': 109,<br>'subsample': 0.9911004988148061,<br>'colsample_bytree': 0.712218746435513,<br>'gamma': 0.00371272754277988,<br>'reg_alpha': 2.2645526631846706e-08,<br>'reg_lambda': 0.0010334133085819733}       | 0.6<br>0.6842105263157895<br>0.5157894736842106<br>0.5263157894736842<br>0.6526315789473685<br>0.5473684210526316<br>0.5368421052631579<br>0.5578947368421052<br>0.5851063829787234<br>0.5851063829787234<br>Mean Accuracy: 0.5791265397536394                 | 0.9746                     |
| ['team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','total_avg_economy_team1_last_10', 'total_avg_economy_team2_last_10',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','batStrLastn_team1',<br>'batStrLastn_team2', 'bowlStrLastn_team1', 'bowlStrLastn_team2',<br>'Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | LightGBM                     | {'num_leaves': 50,<br>'max_depth': 5,<br>'learning_rate': 0.010882483764261345,<br>'n_estimators': 46,<br>'min_child_weight': 0.25189562380714164,<br>'subsample': 0.8548811012105544,<br>'colsample_bytree': 0.8015411597434468}                                                      | 0.6736842105263158<br>0.6210526315789474<br>0.6<br>0.4105263157894737<br>0.6105263157894737<br>0.5578947368421052<br>0.5684210526315789<br>0.5578947368421052<br>0.5851063829787234<br>0.6170212765957447<br>Mean Accuracy: 0.5802127659574468                 | 0.7742                     |
| ['team1_bp',<br>'team2_bp', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15','batScores_team1',<br>'batScores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | CATBOOST                     | {'iterations': 269,<br>'depth': 6,<br>'learning_rate': 0.011537628573476452,<br>'l2_leaf_reg': 0.2623690042785055,<br>'bagging_temperature': 0.9050967381093898,<br>'random_strength': 0.0785360945625559,<br>'border_count': 10,<br>'od_type': 'Iter',<br>'od_wait': 15}              | 0.6526315789473685<br>0.6421052631578947<br>0.5684210526315789<br>0.5578947368421052<br>0.6421052631578947<br>0.6210526315789474<br>0.6842105263157895<br>0.6<br>0.5851063829787234<br>0.574468085106383<br>Mean_accuracy :0.6127995520716685                  | 0.8744                     |
| ['team1_bp',<br>'team2_bp', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15','batScores_team1',<br>'batScores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | Gradient Boosting Classifier | {'n_estimators': 148,<br>'learning_rate': 0.02730859871815362,<br>'max_depth': 2,<br>'min_samples_split': 32,<br>'min_samples_leaf': 16,<br>'subsample': 0.841019981287522,<br>'max_features': 0.983144268133517}                                                                      | 0.6<br>0.5157894736842106<br>0.5263157894736842<br>0.5263157894736842<br>0.6631578947368421<br>0.5157894736842106<br>0.6105263157894737<br>0.5684210526315789<br>0.6063829787234043<br>0.6276595744680851<br>Mean Accuracy: 0.5760358342665174                 | 0.6571                     |
| ['team1_bp',<br>'team2_bp', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15','batScores_team1',<br>'batScores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | XGBOOST                      | {'max_depth': 6,<br>'learning_rate': 0.02425042674478628,<br>'n_estimators': 41,<br>'subsample': 0.8819292913825366,<br>'colsample_bytree': 0.8880837990330908,<br>'gamma': 0.0006047078007835886,<br>'reg_alpha': 2.2414727844781963e-06,<br>'reg_lambda': 0.0016622199135050168}     | 0.5894736842105263<br>0.5578947368421052<br>0.6<br>0.5894736842105263<br>0.6210526315789474<br>0.6421052631578947<br>0.6736842105263158<br>0.5157894736842106<br>0.5425531914893617<br>0.5212765957446809<br>Mean Accuracy: 0.5853303471444569                 | 0.6526                     |
| ['team1_bp',<br>'team2_bp', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15','batScores_team1',<br>'batScores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | LightGBM                     | {'num_leaves': 106,<br>'max_depth': 7,<br>'learning_rate': 0.027660244644042344,<br>'n_estimators': 69,<br>'min_child_weight': 0.1524529727593385,<br>'subsample': 0.6748148763278281,<br>'colsample_bytree': 0.5750858946643675}                                                      | 0.5473684210526316<br>0.5473684210526316<br>0.5578947368421052<br>0.5473684210526316<br>0.6<br>0.5684210526315789<br>0.6421052631578947<br>0.5684210526315789<br>0.5531914893617021<br>0.574468085106383<br>Mean Accuracy: 0.5706606942889139                  | 0.8185                     |
| ['team1_acbr',<br>'team2_acbr','bowling_scores_team1',<br>'bowling_scores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | CATBOOST                     | {'iterations': 24,<br>'depth': 5,<br>'learning_rate': 0.015145002098882832,<br>'l2_leaf_reg': 0.5074412407738867,<br>'bagging_temperature': 0.4715091639462857,<br>'random_strength': 0.6985850890525345,<br>'border_count': 59,<br>'od_type': 'IncToDec',<br>'od_wait': 14}           | 0.5894736842105263<br>0.5473684210526316<br>0.5368421052631579<br>0.6105263157894737<br>0.6<br>0.5684210526315789<br>0.5263157894736842<br>0.5473684210526316<br>0.48936170212765956<br>0.6063829787234043<br>Mean_accuracy :0.5622060470324749                | 0.6371                     |
| ['team1_acbr',<br>'team2_acbr','bowling_scores_team1',<br>'bowling_scores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Gradient Boosting Classifier | {'n_estimators': 113,<br>'learning_rate': 0.011510754172410433,<br>'max_depth': 6,<br>'min_samples_split': 67,<br>'min_samples_leaf': 83,<br>'subsample': 0.5064749631444083,<br>'max_features': 0.967757555947919}<br>Selection deleted<br>                                           | 0.5684210526315789<br>0.5263157894736842<br>0.5894736842105263<br>0.6421052631578947<br>0.6105263157894737<br>0.5789473684210527<br>0.5157894736842106<br>0.43157894736842106<br>0.5106382978723404<br>0.6595744680851063<br>Mean Accuracy: 0.5633370660694288 | 0.6086                     |
| ['team1_acbr',<br>'team2_acbr','bowling_scores_team1',<br>'bowling_scores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | XGBOOST                      | {'max_depth': 3,<br>'learning_rate': 0.056416825773584235,<br>'n_estimators': 193,<br>'subsample': 0.6853860566220248,<br>'colsample_bytree': 0.5342191190924427,<br>'gamma': 0.0781078505159753,<br>'reg_alpha': 5.539948941678334e-05,<br>'reg_lambda': 0.003293717475622626}<br>    | 0.4842105263157895<br>0.6<br>0.49473684210526314<br>0.631578947368421<br>0.631578947368421<br>0.5052631578947369<br>0.49473684210526314<br>0.5578947368421052<br>0.5<br>0.5851063829787234<br>Mean Accuracy: 0.5485106382978723                                | 0.8238                     |
| ['team1_acbr',<br>'team2_acbr','bowling_scores_team1',<br>'bowling_scores_team2','Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | LightGBM                     | {'num_leaves': 143,<br>'max_depth': 5,<br>'learning_rate': 0.010001638160578072,<br>'n_estimators': 346,<br>'min_child_weight': 0.3030875656512072,<br>'subsample': 0.9905214678530757,<br>'colsample_bytree': 0.8356062734201423}                                                     | 0.5578947368421052<br>0.5473684210526316<br>0.5789473684210527<br>0.631578947368421<br>0.5368421052631579<br>0.4842105263157895<br>0.5684210526315789<br>0.5473684210526316<br>0.5106382978723404<br>0.5851063829787234<br>Mean Accuracy: 0.5548376259798432   | 0.8006                     |
| ['batStrLastn_team1',<br>'batStrLastn_team2','bowlStrLastn_team1',<br>'bowlStrLastn_team2','team1_winp_team2_last15',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | CATBOOST                     | {'iterations': 10,<br>'depth': 2,<br>'learning_rate': 0.01848623669208408,<br>'l2_leaf_reg': 0.017988409908596244,<br>'bagging_temperature': 0.20967122365125102,<br>'random_strength': 0.8650738511101035,<br>'border_count': 25,<br>'od_type': 'IncToDec',<br>'od_wait': 19}         | 0.6736842105263158<br>0.5578947368421052<br>0.6<br>0.4631578947368421<br>0.5789473684210527<br>0.5052631578947369<br>0.5894736842105263<br>0.6105263157894737<br>0.5212765957446809<br>0.5425531914893617<br>Mean_accuracy :0.5642777155655095                 | 0.5738                     |
| ['batStrLastn_team1',<br>'batStrLastn_team2','bowlStrLastn_team1',<br>'bowlStrLastn_team2','team1_winp_team2_last15',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | Gradient Boosting Classifier | {'n_estimators': 94,<br>'learning_rate': 0.011892386246013546,<br>'max_depth': 3,<br>'min_samples_split': 15,<br>'min_samples_leaf': 36,<br>'subsample': 0.7223969685947926,<br>'max_features': 0.9015159921167769}<br>                                                                | 0.631578947368421<br>0.5684210526315789<br>0.5894736842105263<br>0.5052631578947369<br>0.6105263157894737<br>0.5894736842105263<br>0.5473684210526316<br>0.5473684210526316<br>0.5638297872340425<br>0.5106382978723404<br>Mean Accuracy: 0.5663941769316909   | 0.6656                     |
| ['batStrLastn_team1',<br>'batStrLastn_team2','bowlStrLastn_team1',<br>'bowlStrLastn_team2','team1_winp_team2_last15',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | XGBOOST                      | {'max_depth': 2,<br>'learning_rate': 0.010002813522745704,<br>'n_estimators': 132,<br>'subsample': 0.5115307381300043,<br>'colsample_bytree': 0.9146007866988358,<br>'gamma': 0.0001723374651084822,<br>'reg_alpha': 0.00102281143442836,<br>'reg_lambda': 0.00311761648703954}        | 0.5894736842105263<br>0.5684210526315789<br>0.6105263157894737<br>0.49473684210526314<br>0.6526315789473685<br>0.5473684210526316<br>0.6<br>0.5368421052631579<br>0.5106382978723404<br>0.5319148936170213<br>Mean Accuracy: 0.5642553191489361                | 0.6508                     |
| ['batStrLastn_team1',<br>'batStrLastn_team2','bowlStrLastn_team1',<br>'bowlStrLastn_team2','team1_winp_team2_last15',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | LightGBM                     | {'num_leaves': 256,<br>'max_depth': 2,<br>'learning_rate': 0.014661486242256024,<br>'n_estimators': 466,<br>'min_child_weight': 3.7487499435043143,<br>'subsample': 0.7051644835958302,<br>'colsample_bytree': 0.5218031910932098}                                                     | 0.5894736842105263<br>0.6<br>0.5789473684210527<br>0.5052631578947369<br>0.6<br>0.5578947368421052<br>0.5473684210526316<br>0.5789473684210527<br>0.574468085106383<br>0.5106382978723404<br>Mean Accuracy: 0.5643001119820827                                 | 0.6993                     |
| ['team1_bp',<br>'team2_bp','team1_acbr',<br>'team2_acbr',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | CATBOOST                     | {'iterations': 48,<br>'depth': 2,<br>'learning_rate': 0.016116932452276217,<br>'l2_leaf_reg': 0.8536108680628662,<br>'bagging_temperature': 0.6146937723645557,<br>'random_strength': 0.5440479581479567,<br>'border_count': 168,<br>'od_type': 'IncToDec',<br>'od_wait': 18}          | 0.631578947368421<br>0.49473684210526314<br>0.5684210526315789<br>0.5052631578947369<br>0.631578947368421<br>0.49473684210526314<br>0.6<br>0.6526315789473685<br>0.574468085106383<br>0.5638297872340425<br>Mean_accuracy :0.5717245240761478                  | 0.578                      |
| ['team1_bp',<br>'team2_bp','team1_acbr',<br>'team2_acbr',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Gradient Boosting Classifier | {'n_estimators': 37,<br>'learning_rate': 0.01073003333462701,<br>'max_depth': 2,<br>'min_samples_split': 52,<br>'min_samples_leaf': 19,<br>'subsample': 0.990773457892913,<br>'max_features': 0.909553850318634}                                                                       | 0.6736842105263158<br>0.5368421052631579<br>0.5578947368421052<br>0.5263157894736842<br>0.6105263157894737<br>0.5052631578947369<br>0.6<br>0.5894736842105263<br>0.574468085106383<br>0.574468085106383<br>Mean Accuracy: 0.5748936170212765                   | 0.5886                     |
| ['team1_bp',<br>'team2_bp','team1_acbr',<br>'team2_acbr',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | XGBOOST                      | {'max_depth': 2,<br>'learning_rate': 0.010817700717149268,<br>'n_estimators': 21,<br>'subsample': 0.5338101642985355,<br>'colsample_bytree': 0.7720929571723418,<br>'gamma': 0.00013263917157462768,<br>'reg_alpha': 1.9167023907273996e-08,<br>'reg_lambda': 0.0015406063874367762}   | 0.6421052631578947<br>0.5684210526315789<br>0.5684210526315789<br>0.47368421052631576<br>0.6421052631578947<br>0.5157894736842106<br>0.5894736842105263<br>0.6421052631578947<br>0.574468085106383<br>0.5319148936170213<br>Mean Accuracy: 0.5748488241881299  | 0.6631                     |
| ['team1_bp',<br>'team2_bp','team1_acbr',<br>'team2_acbr',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | LightGBM                     | {'num_leaves': 72,<br>'max_depth': 3,<br>'learning_rate': 0.017907865127074877,<br>'n_estimators': 43,<br>'min_child_weight': 0.5436243536558142,<br>'subsample': 0.7824883509250852,<br>'colsample_bytree': 0.8804081196747745}                                                       | 0.6736842105263158<br>0.5473684210526316<br>0.5789473684210527<br>0.5157894736842106<br>0.6421052631578947<br>0.5263157894736842<br>0.6105263157894737<br>0.5894736842105263<br>0.5319148936170213<br>0.5425531914893617<br>Mean Accuracy: 0.5758678611422172  | 0.635                      |
| ['batScores_team1',<br>'batScores_team2',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2','','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | CATBOOST                     | {'iterations': 331,<br>'depth': 5,<br>'learning_rate': 0.015278684429017642,<br>'l2_leaf_reg': 1.4921353153778472,<br>'bagging_temperature': 0.30126898277026604,<br>'random_strength': 0.1234137374661574,<br>'border_count': 86,<br>'od_type': 'IncToDec',<br>'od_wait': 17}         | 0.631578947368421<br>0.6<br>0.6105263157894737<br>0.5263157894736842<br>0.6736842105263158<br>0.6105263157894737<br>0.5789473684210527<br>0.6631578947368421<br>0.5851063829787234<br>0.5106382978723404<br>Mean_accuracy :0.5990481522956326                  | 0.8881                     |
| ['batScores_team1',<br>'batScores_team2',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2','','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Gradient Boosting Classifier | {'n_estimators': 292,<br>'learning_rate': 0.015141942363215479,<br>'max_depth': 5,<br>'min_samples_split': 17,<br>'min_samples_leaf': 19,<br>'subsample': 0.9811973358000244,<br>'max_features': 0.9873542991276633}                                                                   | 0.6631578947368421<br>0.5789473684210527<br>0.5578947368421052<br>0.5473684210526316<br>0.6631578947368421<br>0.6<br>0.5894736842105263<br>0.6526315789473685<br>0.6382978723404256<br>0.4787234042553192<br>Mean Accuracy: 0.5969652855543113                 | 0.8723                     |
| ['batScores_team1',<br>'batScores_team2',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2','','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | XGBOOST                      | {'max_depth': 5,<br>'learning_rate': 0.014105978372981838,<br>'n_estimators': 238,<br>'subsample': 0.9010466788402482,<br>'colsample_bytree': 0.8889138695742023,<br>'gamma': 0.0023167859577284664,<br>'reg_alpha': 1.4271918329527897e-08,<br>'reg_lambda': 0.03779643851271714}<br> | 0.7052631578947368<br>0.6<br>0.5789473684210527<br>0.5157894736842106<br>0.6736842105263158<br>0.6210526315789474<br>0.5894736842105263<br>0.631578947368421<br>0.5957446808510638<br>0.4574468085106383<br>Mean Accuracy: 0.5968980963045912                  | 0.9219                     |
| ['batScores_team1',<br>'batScores_team2',<br>'Margin_of_victory_avg_last10_team1', 'current_form_team1','current_form_team2',<br>'Margin_of_victory_avg_last10_team2','','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | LightGBM                     | {'num_leaves': 223,<br>'max_depth': 4,<br>'learning_rate': 0.015127195811407878,<br>'n_estimators': 212,<br>'min_child_weight': 0.01783314616168886,<br>'subsample': 0.9486559122007586,<br>'colsample_bytree': 0.964138880924623}                                                     | 0.6947368421052632<br>0.5684210526315789<br>0.5578947368421052<br>0.5368421052631579<br>0.6421052631578947<br>0.5789473684210527<br>0.5473684210526316<br>0.6526315789473685<br>0.6276595744680851<br>0.4787234042553192<br>Mean Accuracy: 0.5885330347144457  | 0.7584                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | CATBOOST                     | {'iterations': 277,<br>'depth': 5,<br>'learning_rate': 0.018033268389790084,<br>'l2_leaf_reg': 0.21033801243511593,<br>'bagging_temperature': 0.8128540143913973,<br>'random_strength': 0.8645921235925083,<br>'border_count': 208,<br>'od_type': 'Iter',<br>'od_wait': 26}            | 0.5555555555555556<br>0.5079365079365079<br>0.6031746031746031<br>0.5714285714285714<br>0.6349206349206349<br>0.6507936507936508<br>0.5873015873015873<br>0.5873015873015873<br>0.6451612903225806<br>0.6774193548387096<br>Mean_accuracy :0.6020993343573989  | 0.8789                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | Gradient Boosting Classifier | {'n_estimators': 389,<br>'learning_rate': 0.13411144007177836,<br>'max_depth': 2,<br>'min_samples_split': 93,<br>'min_samples_leaf': 5,<br>'subsample': 0.9536738790148818,<br>'max_features': 0.6864115223866214}                                                                     | 0.6190476190476191<br>0.5396825396825397<br>0.6349206349206349<br>0.5555555555555556<br>0.6507936507936508<br>0.5238095238095238<br>0.5873015873015873<br>0.6190476190476191<br>0.6290322580645161<br>0.6612903225806451<br>Mean Accuracy: 0.6020481310803891  | 0.906                      |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | XGBOOST                      | {'max_depth': 3,<br>'learning_rate': 0.041024650224694634,<br>'n_estimators': 300,<br>'subsample': 0.6548782767890159,<br>'colsample_bytree': 0.9338498083398776,<br>'gamma': 0.046459308599143594,<br>'reg_alpha': 0.00027631222679533346,<br>'reg_lambda': 0.001725562474375547}     | 0.6031746031746031<br>0.5555555555555556<br>0.6507936507936508<br>0.5873015873015873<br>0.5555555555555556<br>0.5873015873015873<br>0.6031746031746031<br>0.6507936507936508<br>0.6451612903225806<br>0.6290322580645161<br>Mean Accuracy: 0.606784434203789   | 0.8949                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | LightGBM                     | {'num_leaves': 244,<br>'max_depth': 2,<br>'learning_rate': 0.15630397462459264,<br>'n_estimators': 364,<br>'min_child_weight': 0.014951373420502139,<br>'subsample': 0.792380458244163,<br>'colsample_bytree': 0.9121158930055073}<br>                                                 | 0.6507936507936508<br>0.49206349206349204<br>0.6349206349206349<br>0.6825396825396826<br>0.6190476190476191<br>0.5396825396825397<br>0.5555555555555556<br>0.6507936507936508<br>0.6451612903225806<br>0.5483870967741935<br>Mean Accuracy: 0.60189452124936   | 0.9108                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | CATBOOST                     | {'iterations': 69,<br>'depth': 5,<br>'learning_rate': 0.013208833434751047,<br>'l2_leaf_reg': 1.076294798106204,<br>'bagging_temperature': 0.47187998417531996,<br>'random_strength': 0.8936984667763431,<br>'border_count': 90,<br>'od_type': 'Iter',<br>'od_wait': 45}               | 0.7727272727272727<br>0.7272727272727273<br>0.5454545454545454<br>0.6363636363636364<br>0.6818181818181818<br>0.6363636363636364<br>0.5909090909090909<br>0.6363636363636364<br>0.6190476190476191<br>0.8095238095238095<br>Mean_accuracy :0.6655844155844155  | 0.7385                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | Gradient Boosting Classifier | {'n_estimators': 37,<br>'learning_rate': 0.050565387512842015,<br>'max_depth': 3,<br>'min_samples_split': 75,<br>'min_samples_leaf': 7,<br>'subsample': 0.5485094877530132,<br>'max_features': 0.519071147935523}                                                                      | 0.7272727272727273<br>0.8181818181818182<br>0.45454545454545453<br>0.5454545454545454<br>0.6818181818181818<br>0.5909090909090909<br>0.7727272727272727<br>0.6818181818181818<br>0.6190476190476191<br>0.7619047619047619<br>Mean Accuracy: 0.6653679653679654 | 0.7385                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | XGBOOST                      | {'max_depth': 2,<br>'learning_rate': 0.015783440483501218,<br>'n_estimators': 123,<br>'subsample': 0.6406727808448333,<br>'colsample_bytree': 0.5907096381230622,<br>'gamma': 0.0001426393560343889,<br>'reg_alpha': 8.416469844259471e-05,<br>'reg_lambda': 0.8691133048954046}<br>   | 0.6818181818181818<br>0.7272727272727273<br>0.5<br>0.6818181818181818<br>0.6363636363636364<br>0.5909090909090909<br>0.7272727272727273<br>0.5909090909090909<br>0.6190476190476191<br>0.8095238095238095<br>Mean Accuracy: 0.6564935064935065                 | 0.7339                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | LightGBM                     | {'num_leaves': 44,<br>'max_depth': 10,<br>'learning_rate': 0.010585546805277466,<br>'n_estimators': 139,<br>'min_child_weight': 0.3585378943245338,<br>'subsample': 0.9601340115034785,<br>'colsample_bytree': 0.5374932615508997}<br>                                                 | 0.7272727272727273<br>0.7727272727272727<br>0.5909090909090909<br>0.5454545454545454<br>0.6818181818181818<br>0.5909090909090909<br>0.6363636363636364<br>0.6363636363636364<br>0.5714285714285714<br>0.8095238095238095<br>Mean Accuracy: 0.6562770562770562  | 0.7752                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | CATBOOST                     | {'iterations': 15,<br>'depth': 6,<br>'learning_rate': 0.016405351355932622,<br>'l2_leaf_reg': 0.6860125818433047,<br>'bagging_temperature': 0.5676637905796567,<br>'random_strength': 0.404532237114998,<br>'border_count': 136,<br>'od_type': 'Iter',<br>'od_wait': 8}                | 0.8181818181818182<br>0.6363636363636364<br>0.7<br>0.7<br>0.8<br>0.6<br>0.8<br>0.6<br>0.6<br>0.8<br>Mean_accuracy :0.7054545454545453                                                                                                                          | 0.9117                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | Gradient Boosting Classifier | {'n_estimators': 232,<br>'learning_rate': 0.1362028213878631,<br>'max_depth': 5,<br>'min_samples_split': 14,<br>'min_samples_leaf': 35,<br>'subsample': 0.7838162105364834,<br>'max_features': 0.6124814099539844}                                                                     | 0.8181818181818182<br>0.6363636363636364<br>0.4<br>0.4<br>0.8<br>0.7<br>0.7<br>0.8<br>0.5<br>0.6<br>Mean Accuracy: 0.6354545454545455                                                                                                                          | 0.7254                     |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | XGBOOST                      | {'max_depth': 2,<br>'learning_rate': 0.17770076398069026,<br>'n_estimators': 472,<br>'subsample': 0.5425613821112699,<br>'colsample_bytree': 0.648810871758427,<br>'gamma': 0.03707144633589007,<br>'reg_alpha': 5.5009744725318444e-06,<br>'reg_lambda': 0.001958700762343307}        | 0.8181818181818182<br>0.6363636363636364<br>0.6<br>0.7<br>0.8<br>0.6<br>0.5<br>0.6<br>0.7<br>0.6<br>Mean Accuracy: 0.6554545454545454                                                                                                                          | 1                          |
| train_0[['batScores_team1',<br>'batScores_team2', 'current_form_team1','current_form_team2','team1_count_50runs_last15',<br>'team2_count_50runs_last15','team1_winp_last5','team2_winp_last5']]<br><br>SERIES_TYPE==2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | LightGBM                     | {'num_leaves': 110,<br>'max_depth': 4,<br>'learning_rate': 0.01653864588218143,<br>'n_estimators': 30,<br>'min_child_weight': 0.024981684156516804,<br>'subsample': 0.6404221449044606,<br>'colsample_bytree': 0.8234104282467775}<br>                                                 | 0.6363636363636364<br>0.6363636363636364<br>0.6<br>0.8<br>0.7<br>0.6<br>0.7<br>0.4<br>0.6<br>0.7<br>Mean Accuracy: 0.6372727272727272                                                                                                                          | 0.7156                     |
| X = train_data[['Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2','team1_winp_team2_last15',<br>'total_avg_economy_team1_last_10','total_avg_economy_team2_last_10',<br>'current_form_team1','current_form_team2','wins_when_toss_won_avg10_team1','wins_when_toss_won_avg10_team2',<br>'batStrLastn_team1','batStrLastn_team2','ground_avg_runs_last15'<br>]]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | CATBOOST                     | {'iterations': 99,<br>'depth': 5,<br>'learning_rate': 0.0114755591732784,<br>'l2_leaf_reg': 0.010044824445741155,<br>'bagging_temperature': 0.5126781330946139,<br>'random_strength': 0.0014637506143325846,<br>'border_count': 163,<br>'od_type': 'Iter',<br>'od_wait': 27}           | 0.7157894736842105<br>0.6421052631578947<br>0.5684210526315789<br>0.5157894736842106<br>0.6210526315789474<br>0.631578947368421<br>0.6<br>0.6105263157894737<br>0.5531914893617021<br>0.5531914893617021<br>Mean_accuracy :0.6011646136618142                  | 0.7637                     |
| X = train_data[['Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2','team1_winp_team2_last15',<br>'total_avg_economy_team1_last_10','total_avg_economy_team2_last_10',<br>'current_form_team1','current_form_team2','wins_when_toss_won_avg10_team1','wins_when_toss_won_avg10_team2',<br>'batStrLastn_team1','batStrLastn_team2','ground_avg_runs_last15'<br>]]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | Gradient Boosting Classifier | {'n_estimators': 277,<br>'learning_rate': 0.013060301438907669,<br>'max_depth': 5,<br>'min_samples_split': 26,<br>'min_samples_leaf': 55,<br>'subsample': 0.9928381620680123,<br>'max_features': 0.5386813648420103}                                                                   | 0.6210526315789474<br>0.5789473684210527<br>0.6105263157894737<br>0.5263157894736842<br>0.6<br>0.5894736842105263<br>0.5473684210526316<br>0.6105263157894737<br>0.5531914893617021<br>0.5319148936170213<br>Mean Accuracy: 0.5769316909294513<br>             | 0.7795                     |
| X = train_data[['Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2','team1_winp_team2_last15',<br>'total_avg_economy_team1_last_10','total_avg_economy_team2_last_10',<br>'current_form_team1','current_form_team2','wins_when_toss_won_avg10_team1','wins_when_toss_won_avg10_team2',<br>'batStrLastn_team1','batStrLastn_team2','ground_avg_runs_last15'<br>]]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | XGBOOST                      | {'max_depth': 6,<br>'learning_rate': 0.017451505332968433,<br>'n_estimators': 95,<br>'subsample': 0.9389953881970047,<br>'colsample_bytree': 0.9437992371565853,<br>'gamma': 0.002762127905182028,<br>'reg_alpha': 0.0022199565057753707,<br>'reg_lambda': 1.473232687356804}          | 0.6105263157894737<br>0.6<br>0.5473684210526316<br>0.4842105263157895<br>0.6<br>0.5684210526315789<br>0.631578947368421<br>0.6105263157894737<br>0.5531914893617021<br>0.5425531914893617<br>Mean Accuracy: 0.5748376259798433                                 | 0.924                      |
| X = train_data[['Margin_of_victory_avg_last10_team1','Margin_of_victory_avg_last10_team2','team1_winp_team2_last15',<br>'total_avg_economy_team1_last_10','total_avg_economy_team2_last_10',<br>'current_form_team1','current_form_team2','wins_when_toss_won_avg10_team1','wins_when_toss_won_avg10_team2',<br>'batStrLastn_team1','batStrLastn_team2','ground_avg_runs_last15'<br>]]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | LightGBM                     | {'num_leaves': 233,<br>'max_depth': 3,<br>'learning_rate': 0.011250430631380464,<br>'n_estimators': 133,<br>'min_child_weight': 0.049041251885792635,<br>'subsample': 0.7236532595715541,<br>'colsample_bytree': 0.52807880476009}                                                     | 0.5894736842105263<br>0.5789473684210527<br>0.6<br>0.5052631578947369<br>0.631578947368421<br>0.5684210526315789<br>0.5789473684210527<br>0.6<br>0.5531914893617021<br>0.574468085106383<br>Mean Accuracy: 0.5780291153415453                                  | 0.6898                     |
| X = train_data[['team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']]                                                                                                                                                                                                                                                                                                                                                                                     | CATBOOST                     | {'iterations': 158,<br>'depth': 5,<br>'learning_rate': 0.010875324059219779,<br>'l2_leaf_reg': 0.08477297853892551,<br>'bagging_temperature': 0.4235335350748377,<br>'random_strength': 0.14916694253142612,<br>'border_count': 53,<br>'od_type': 'IncToDec',<br>'od_wait': 44}        | 0.6631578947368421<br>0.5894736842105263<br>0.5578947368421052<br>0.5473684210526316<br>0.6526315789473685<br>0.5894736842105263<br>0.5789473684210527<br>0.6<br>0.5851063829787234<br>0.6276595744680851<br>Mean_accuracy :0.599171332586786                  | 0.8649                     |
| X = train_data[['team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']]                                                                                                                                                                                                                                                                                                                                                                                     | Gradient Boosting Classifier | {'n_estimators': 67,<br>'learning_rate': 0.026366042320800843,<br>'max_depth': 4,<br>'min_samples_split': 65,<br>'min_samples_leaf': 51,<br>'subsample': 0.8044456956056237,<br>'max_features': 0.6927584559248197}                                                                    | 0.6736842105263158<br>0.5578947368421052<br>0.6<br>0.5157894736842106<br>0.6526315789473685<br>0.5894736842105263<br>0.6210526315789474<br>0.631578947368421<br>0.5425531914893617<br>0.5319148936170213<br>Mean Accuracy: 0.5916573348264278                  | 0.7552                     |
| X = train_data[['team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']]                                                                                                                                                                                                                                                                                                                                                                                     | XGBOOST                      | {'max_depth': 5,<br>'learning_rate': 0.010868721963209564,<br>'n_estimators': 31,<br>'subsample': 0.8341042757923336,<br>'colsample_bytree': 0.9423048473887499,<br>'gamma': 0.6611259744580485,<br>'reg_alpha': 2.1436411362698711e-07,<br>'reg_lambda': 0.0691045016112978}          | 0.6947368421052632<br>0.5368421052631579<br>0.6<br>0.5368421052631579<br>0.6947368421052632<br>0.5473684210526316<br>0.5894736842105263<br>0.6210526315789474<br>0.6063829787234043<br>0.6170212765957447<br>Mean Accuracy: 0.6044456886898096                 | 0.8196                     |
| X = train_data[['team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15']]                                                                                                                                                                                                                                                                                                                                                                                     | LightGBM                     | {'num_leaves': 222,<br>'max_depth': 3,<br>'learning_rate': 0.019714120788676966,<br>'n_estimators': 45,<br>'min_child_weight': 0.895973136165359,<br>'subsample': 0.5453384767762011,<br>'colsample_bytree': 0.8229314588448927}                                                       | 0.7052631578947368<br>0.5789473684210527<br>0.5789473684210527<br>0.5368421052631579<br>0.6736842105263158<br>0.5263157894736842<br>0.6<br>0.6947368421052632<br>0.5638297872340425<br>0.6170212765957447<br>Mean Accuracy: 0.6075587905935051                 | 0.6856                     |
| ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | CATBOOST                     | {'iterations': 76,<br>'depth': 3,<br>'learning_rate': 0.030120874491960572,<br>'l2_leaf_reg': 0.019428674355769435,<br>'bagging_temperature': 0.7097065390444934,<br>'random_strength': 0.2459734088569595,<br>'border_count': 68,<br>'od_type': 'Iter',<br>'od_wait': 23}             | 0.6421052631578947<br>0.5684210526315789<br>0.6210526315789474<br>0.5684210526315789<br>0.5894736842105263<br>0.5894736842105263<br>0.45263157894736844<br>0.5368421052631579<br>0.5212765957446809<br>0.5851063829787234<br>Mean_accuracy :0.5674804031354983 | 0.6592                     |
| ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | Gradient Boosting Classifier | {'n_estimators': 97,<br>'learning_rate': 0.012964410821276282,<br>'max_depth': 2,<br>'min_samples_split': 45,<br>'min_samples_leaf': 59,<br>'subsample': 0.7102946667311244,<br>'max_features': 0.5172965304952994}                                                                    | 0.6210526315789474<br>0.6105263157894737<br>0.5894736842105263<br>0.5473684210526316<br>0.6<br>0.6105263157894737<br>0.4631578947368421<br>0.5684210526315789<br>0.5<br>0.5638297872340425<br>Mean Accuracy: 0.5674356103023517                                | 0.60864                    |
| ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | XGBOOST                      | {'max_depth': 4,<br>'learning_rate': 0.04576012357994757,<br>'n_estimators': 25,<br>'subsample': 0.5794379692361041,<br>'colsample_bytree': 0.6182947491361083,<br>'gamma': 0.012527458093510104,<br>'reg_alpha': 0.0024171506654675767,<br>'reg_lambda': 0.6914328900598133}          | 0.6<br>0.6210526315789474<br>0.5684210526315789<br>0.5789473684210527<br>0.5684210526315789<br>0.5684210526315789<br>0.5052631578947369<br>0.5684210526315789<br>0.5531914893617021<br>0.5957446808510638<br>Mean Accuracy: 0.5727883538633819                 | 0.7341                     |
| ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   | LightGBM                     | {'num_leaves': 221,<br>'max_depth': 3,<br>'learning_rate': 0.011060455564213544,<br>'n_estimators': 184,<br>'min_child_weight': 0.23363474096798262,<br>'subsample': 0.9476488969345269,<br>'colsample_bytree': 0.9601301064673793}                                                    | 0.6<br>0.6<br>0.6210526315789474<br>0.6<br>0.5052631578947369<br>0.5894736842105263<br>0.5368421052631579<br>0.49473684210526314<br>0.5<br>0.574468085106383<br>Mean Accuracy: 0.5621836506159015                                                              | 0.6772                     |
| team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2' | CATBOOST                     | {'iterations': 295,<br>'depth': 4,<br>'learning_rate': 0.020566856099423863,<br>'l2_leaf_reg': 0.28204845868451556,<br>'bagging_temperature': 0.15975568860797557,<br>'random_strength': 0.86060609263927,<br>'border_count': 64,<br>'od_type': 'IncToDec',<br>'od_wait': 41}          | 0.6526315789473685<br>0.5894736842105263<br>0.5894736842105263<br>0.5473684210526316<br>0.6947368421052632<br>0.6<br>0.6105263157894737<br>0.5789473684210527<br>0.5212765957446809<br>0.5425531914893617<br>Mean_accuracy :0.5926987681970884                 | 0.8755                     |
| team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2' | Gradient Boosting Classifier | {'n_estimators': 124,<br>'learning_rate': 0.02974492322641823,<br>'max_depth': 3,<br>'min_samples_split': 24,<br>'min_samples_leaf': 34,<br>'subsample': 0.6129961662089509,<br>'max_features': 0.9405192068972165}<br>                                                                | 0.6421052631578947<br>0.5578947368421052<br>0.631578947368421<br>0.5157894736842106<br>0.6736842105263158<br>0.6<br>0.5368421052631579<br>0.5473684210526316<br>0.5851063829787234<br>0.5957446808510638<br>Mean Accuracy: 0.5886114221724524                  | 0.7932                     |
| team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2' | XGBOOST                      | {'max_depth': 5,<br>'learning_rate': 0.013996451566597935,<br>'n_estimators': 232,<br>'subsample': 0.5678399644807302,<br>'colsample_bytree': 0.5839649406117547,<br>'gamma': 0.020411781785650736,<br>'reg_alpha': 1.9809343170219302e-05,<br>'reg_lambda': 0.8097403174867172}       | 0.6631578947368421<br>0.6421052631578947<br>0.5789473684210527<br>0.5368421052631579<br>0.6631578947368421<br>0.5789473684210527<br>0.5578947368421052<br>0.6<br>0.5425531914893617<br>0.5957446808510638<br>Mean Accuracy: 0.5959350503919373                 | 0.9651                     |
| team1_bp', 'team2_bp', 'team1_acbr', 'team2_acbr',<br>'strength_team1', 'strength_team2', 'current_form_team1',<br>'current_form_team2', 'team1_count_50runs_last15',<br>'team2_count_50runs_last15', 'team1_winp_last5', 'team2_winp_last5',<br>'team2only_avg_runs_last15', 'total_strike_rate_team_1_last10',<br>'total_strike_rate_team_2_last10','num_all_rounders_team_1',<br>'num_all_rounders_team_2', 'total_bowling_avg_team1_last_10',<br>'total_bowling_avg_team2_last_10','total_avg_economy_team1_last_10',<br>'total_avg_economy_team2_last_10', 'series_type', 'batStrLastn_team1',<br>'batStrLastn_team2',<br>'batScores_team1', 'batScores_team2', 'bowling_scores_team1',<br>'bowling_scores_team2',<br>'wins_when_toss_lost_avg10_team1', 'wins_when_toss_lost_avg10_team2',<br>'wins_when_toss_won_avg10_team1', 'wins_when_toss_won_avg10_team2',<br>'Margin_of_victory_avg_last10_team1',<br>'Margin_of_victory_avg_last10_team2',<br>'team1only_avg_runs_last15', 'team1_winp_team2_last15',<br>'ground_avg_runs_last15','ground_avg_runs_team1last15',<br>'ground_avg_runs_team2last15', 'meanBatScoresOnGround_team1',<br>'meanBatScoresOnGround_team2', 'avg_strike_rate_on_ground_team1',<br>'avg_strike_rate_on_ground_team2', 'avg_economy_on_ground_team1',<br>'avg_economy_on_ground_team2', 'winp_on_ground_team1',<br>'winp_on_ground_team2', 'mean_bowlScores_On_Ground_team1',<br>'mean_bowlScores_On_Ground_team2' | LightGBM                     | {'num_leaves': 193,<br>'max_depth': 4,<br>'learning_rate': 0.01408462136861115,<br>'n_estimators': 89,<br>'min_child_weight': 0.11557568306079713,<br>'subsample': 0.7746889815590263,<br>'colsample_bytree': 0.8500973102263574}<br>                                                  | 0.6631578947368421<br>0.5368421052631579<br>0.5789473684210527<br>0.49473684210526314<br>0.6210526315789474<br>0.5894736842105263<br>0.5368421052631579<br>0.6105263157894737<br>0.574468085106383<br>0.6382978723404256<br>Mean Accuracy: 0.584434490481523   | 0.7911                     |

# Final Model that gave Higest Score in R1

In [None]:
X_1,y_1 = train_data[['total_strike_rate_team_1_last10', 'total_strike_rate_team_2_last10',
       'total_fours_avg_team1_last10', 'total_fours_avg_team2_last10',
       'total_sixes_avg_team1_last10', 'total_sixes_avg_team2_last10',
       'total_avg_wicket_team1_last10', 'total_avg_wicket_team2_last10',
       'total_bowling_SR_team1_last_10', 'total_bowling_SR_team2_last_10']], train_data['winner_01']

In [None]:
X_1_test= test_data[['total_strike_rate_team_1_last10', 'total_strike_rate_team_2_last10',
       'total_fours_avg_team1_last10', 'total_fours_avg_team2_last10',
       'total_sixes_avg_team1_last10', 'total_sixes_avg_team2_last10',
       'total_avg_wicket_team1_last10', 'total_avg_wicket_team2_last10',
       'total_bowling_SR_team1_last_10', 'total_bowling_SR_team2_last_10']]

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
algo_name = 'GradientBoostingClassifier'
is_ensemble = 'no'
n_trees = 10
depth = 2
lr = 0.1

In [None]:
clf_gbm = GradientBoostingClassifier(n_estimators = n_trees, max_depth = depth, learning_rate = lr).fit(X_1,y_1)

In [None]:
train_data['y_pred_01'] = clf_gbm.predict(X_1)
test_data['y_pred_01'] = clf_gbm.predict(X_1_test)

In [None]:
y_pred_2_GBM = train_data['y_pred_01']
accuracy2 = accuracy_score(y,y_pred_2_GBM)
print(accuracy2)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y, clf_gbm.predict(X), labels=[0,1]))

In [None]:
train_data['win_pred_score'] = clf_gbm.predict_proba(X_1)[:,1]
test_data['win_pred_score'] = clf_gbm.predict_proba(X_1_test)[:,1]

In [None]:
train_data['win_pred_score'] = np.where( (train_data['y_pred_01']==0), (1-train_data['win_pred_score']), train_data['win_pred_score'])
test_data['win_pred_score'] = np.where( (test_data['y_pred_01']==0), (1-test_data['win_pred_score']), test_data['win_pred_score'])

In [None]:
train_data['win_pred_team_id'] = np.where( (train_data['y_pred_01']==0), (train_data['team1_id']), train_data['team2_id'])
test_data['win_pred_team_id'] = np.where( (test_data['y_pred_01']==0), (test_data['team1_id']), test_data['team2_id'])

In [None]:
df_feat_importance = pd.DataFrame({'feat_name':X_1.columns.tolist(), 'model_feat_imp_train':clf_gbm.feature_importances_}).sort_values(by='model_feat_imp_train', ascending=False)\
                                                                                                                        .reset_index(drop=True).head(10)
df_feat_importance

File Preparation :

File - 1:

In [None]:
train_data['dataset_type'] = 'train'
test_data['dataset_type'] = 'r2'

In [None]:
## refactor

df_file1 = pd.concat([test_data[['match id','dataset_type','win_pred_team_id','win_pred_score',] + list(df_feat_importance['feat_name'].head(10))]])

renaming_dict = {}
for i,col in enumerate(list(df_feat_importance['feat_name'].head(10))):
    renaming_dict[col] = f'indep_feat_id{i+1}'
df_file1.rename(columns=renaming_dict, inplace=True)

for i in range(1,11):
    if f'indep_feat_id{i}' not in df_file1.columns:
        df_file1[f'indep_feat_id{i}'] = np.nan

df_file1['train_algorithm'] = algo_name
df_file1['is_ensemble'] = is_ensemble
df_file1['train_hps_trees'] = n_trees
df_file1['train_hps_depth'] = depth
df_file1['train_hps_lr'] = lr

In [None]:
df_file1.shape

File - 2:

In [None]:
feature_description = feature_desc

In [None]:
df_file2 = df_feat_importance
df_file2['feat_id'] = [i+1 for i in df_file2.index]
df_file2['feat_rank_train'] = [i+1 for i in df_file2.index]
df_file2 = df_file2.set_index('feat_id')
df_file2['feat_description'] = df_file2['feat_name'].map(feature_description)

In [None]:
df_file2

In [None]:
df_file1.to_csv('primary_submission_r2.csv',index=False)
!cp primary_submission_r2.csv "drive/My Drive/"

In [None]:
df_file2.to_csv('secondary_submission_1.csv')
!cp secondary_submission_1.csv "drive/My Drive/"

In [None]:
test_data['winner_id'] = test_data['win_pred_team_id']

In [None]:
desired_columns = ['match id', 'winner_id', 'dataset_type']
df_filtered = test_data[desired_columns]

# Save the filtered DataFrame as a CSV file (without index)
df_filtered.to_csv('dep_var2.csv', index=False)  # Replace 'filtered_data.csv' with your desired filename

In [None]:
!cp dep_var2.csv "drive/My Drive/"