In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm

In [2]:
custom_date_parser = lambda x: datetime.strptime(x, "%Y-%m-%d")
match_list_df = pd.read_csv('csv_data/match_list.csv',parse_dates=['date'],date_parser=custom_date_parser)

# Jump to outer loop

In [42]:
year='2013'
performance_cutoff_date_start = datetime.strptime(year+'-01-01','%Y-%m-%d')
performance_cutoff_date_end = datetime.strptime(year+'-12-31','%Y-%m-%d')
scaler = MinMaxScaler()


In [43]:
country_rank = pd.read_csv('csv_data/country_rank_'+year+'.csv')

In [44]:
country_list = list(country_rank['country'])

# Jump to loop

In [32]:
selected_country = country_list[0]
selected_country

'England'

In [7]:
country_games = match_list_df[(match_list_df['date']>=performance_cutoff_date_start) 
                             & (match_list_df['date']<=performance_cutoff_date_end)
                             & ( (match_list_df['first_innings']==selected_country)
                                |(match_list_df['second_innings']==selected_country)
                             )]

In [8]:
country_games

Unnamed: 0,match_id,date,location,first_innings,second_innings,winner,win_by,win_dif,toss_winner,player_of_match,train_data
257,800471,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
261,800473,2016-02-09,Centurion,England,South Africa,South Africa,wickets,7,England,Q de Kock,True
262,800475,2016-02-12,Johannesburg,England,South Africa,South Africa,wickets,1,South Africa,CH Morris,True
263,800477,2016-02-14,Cape Town,England,South Africa,South Africa,wickets,5,South Africa,AB de Villiers,True
279,946545,2016-06-20,Leicester,Pakistan,England,England,wickets,7,England,HC Knight,True
281,946547,2016-06-22,Worcester,England,Pakistan,England,runs,212,Pakistan,TT Beaumont,True
282,913625,2016-06-24,Birmingham,Sri Lanka,England,England,wickets,10,Sri Lanka,JJ Roy,True
285,946549,2016-06-27,Taunton,England,Pakistan,England,runs,202,England,TT Beaumont,True
286,913631,2016-07-02,Cardiff,England,Sri Lanka,England,runs,122,Sri Lanka,JC Buttler,True
296,913655,2016-08-27,London,Pakistan,England,England,wickets,4,Pakistan,JE Root,True


In [9]:
match_id_list = list(country_games['match_id'])

In [10]:
match_stat_list = []
for match_id in match_id_list:
    match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
    match_stat_list.append(match_df)

In [11]:
match_stat_df = pd.concat(match_stat_list)

In [12]:
match_stat_df.fillna('NA',inplace=True)

In [13]:
match_stat_df = match_stat_df.merge(country_games,how='inner',on='match_id')

In [14]:
match_stat_df.head()

Unnamed: 0,match_id,innings,team,opponent,ball,batsman,non_striker,bowler,scored_runs,extras,...,date,location,first_innings,second_innings,winner_y,win_by,win_dif,toss_winner,player_of_match,train_data
0,800471,1st innings,South Africa,England,0.1,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
1,800471,1st innings,South Africa,England,0.2,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
2,800471,1st innings,South Africa,England,0.3,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
3,800471,1st innings,South Africa,England,0.4,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
4,800471,1st innings,South Africa,England,0.5,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True


In [15]:
match_stat_df.columns

Index(['match_id', 'innings', 'team', 'opponent', 'ball', 'batsman',
       'non_striker', 'bowler', 'scored_runs', 'extras', 'total', 'extra_type',
       'wicket', 'wicket_type', 'player_out', 'fielders', 'winner_x', 'date',
       'location', 'first_innings', 'second_innings', 'winner_y', 'win_by',
       'win_dif', 'toss_winner', 'player_of_match', 'train_data'],
      dtype='object')

In [16]:
bowler_list = list(match_stat_df[match_stat_df['opponent']==selected_country]['bowler'].unique())
bowler_list

['RJW Topley',
 'DJ Willey',
 'BA Stokes',
 'AU Rashid',
 'MM Ali',
 'CJ Jordan',
 'SCJ Broad',
 'CR Woakes',
 'KH Brunt',
 'A Shrubsole',
 'KL Cross',
 'NR Sciver',
 'LA Marsh',
 'HC Knight',
 'GA Elwiss',
 'LE Plunkett',
 'JE Root',
 'A Hartley',
 'MA Wood',
 'LA Dawson',
 'JT Ball',
 'BA Langston',
 'D Hazell']

In [17]:
selected_bowler = bowler_list[0]

In [18]:
bowler_df = match_stat_df[match_stat_df['bowler']==selected_bowler]
total_runs = bowler_df['total'].sum()

run_rate = total_runs/bowler_df.shape[0]
#run_rate_inverse = 1/run_rate
negative_rate = -run_rate

# no_of_wickets,wicket_rate,wicket_per_runs
no_of_wickets = bowler_df['wicket'].sum()-bowler_df[bowler_df['wicket_type']=='run out'].shape[0]
wickets_per_match = no_of_wickets/len(list(bowler_df['match_id'].unique()))
wickets_per_run = no_of_wickets/total_runs


team_score = country_rank[country_rank['country']==selected_country]['score'].values[0]
#opponent_mean

bowler_df.rename(columns={'team':'country'},inplace=True)
bowler_df=bowler_df.merge(country_rank,on='country',how='inner')
opponent_mean = bowler_df[['match_id','country','score']].groupby(['match_id']).min().reset_index()['score'].mean()
matches_played = len(list(bowler_df['match_id'].unique()))
player_of_the_match = country_games[country_games['player_of_match']==selected_bowler].shape[0]

#winning contribution(effectiveness)-% of wickets taken in winning matches
country_win_list = list(country_games[country_games['winner']==selected_country]['match_id'])
winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]

winning_contribution = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df['wicket'].sum()

#winning_wicket_per_run rate contribution
#winning wicket_per_match contirbution

team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
winning_wicket_per_run_rate_contribution = bowler_wicket_per_run/team_wickets_per_run

team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/len(list(winning_match_df['match_id'].unique()))
bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()
winning_wicket_per_match_contribution = bowler_wicket_per_match/team_wicket_per_match

#consistency
consistency = 1/match_stat_df[match_stat_df['bowler']==selected_bowler].groupby(['match_id'])['wicket'].sum().reset_index()['wicket'].std()

no_of_wins=winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()

# total_runs,run_rate,team_score,opponent_mean,matches_played,player_of_the_match,winning_contribution,run_rate_effectiveness

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [19]:
total_runs

177

In [20]:
winning_match_df

Unnamed: 0,match_id,innings,team,opponent,ball,batsman,non_striker,bowler,scored_runs,extras,...,date,location,first_innings,second_innings,winner_y,win_by,win_dif,toss_winner,player_of_match,train_data
0,800471,1st innings,South Africa,England,0.1,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
1,800471,1st innings,South Africa,England,0.2,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
2,800471,1st innings,South Africa,England,0.3,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
3,800471,1st innings,South Africa,England,0.4,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
4,800471,1st innings,South Africa,England,0.5,HM Amla,Q de Kock,RJW Topley,0,0,...,2016-02-06,Port Elizabeth,South Africa,England,England,wickets,5,South Africa,AD Hales,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10721,1059196,2nd innings,Sri Lanka,England,32.3,SIP Fernando,WGAKK Kulasuriya,A Hartley,0,0,...,2016-11-17,Colombo,England,Sri Lanka,England,runs,162,England,,True
10722,1059196,2nd innings,Sri Lanka,England,32.4,SIP Fernando,WGAKK Kulasuriya,A Hartley,0,0,...,2016-11-17,Colombo,England,Sri Lanka,England,runs,162,England,,True
10723,1059196,2nd innings,Sri Lanka,England,32.5,SIP Fernando,WGAKK Kulasuriya,A Hartley,0,0,...,2016-11-17,Colombo,England,Sri Lanka,England,runs,162,England,,True
10724,1059196,2nd innings,Sri Lanka,England,32.6,SIP Fernando,WGAKK Kulasuriya,A Hartley,0,0,...,2016-11-17,Colombo,England,Sri Lanka,England,runs,162,England,,True


In [21]:
#bowler_df = match_stat_df[match_stat_df['bowler']==selected_bowler]
#match_stat_df[match_stat_df['wicket']==1]['extra_type'].unique()
bowler_df

Unnamed: 0,match_id,innings,country,opponent,ball,batsman,non_striker,bowler,scored_runs,extras,...,toss_winner,player_of_match,train_data,win_ratio,effective_win_by_runs,effective_win_by_wickets,matches_played,win_count,score,rank
0,800471,1st innings,South Africa,England,0.1,HM Amla,Q de Kock,RJW Topley,0,0,...,South Africa,AD Hales,True,0.538462,127,12,26,14,2.469542,3
1,800471,1st innings,South Africa,England,0.2,HM Amla,Q de Kock,RJW Topley,0,0,...,South Africa,AD Hales,True,0.538462,127,12,26,14,2.469542,3
2,800471,1st innings,South Africa,England,0.3,HM Amla,Q de Kock,RJW Topley,0,0,...,South Africa,AD Hales,True,0.538462,127,12,26,14,2.469542,3
3,800471,1st innings,South Africa,England,0.4,HM Amla,Q de Kock,RJW Topley,0,0,...,South Africa,AD Hales,True,0.538462,127,12,26,14,2.469542,3
4,800471,1st innings,South Africa,England,0.5,HM Amla,Q de Kock,RJW Topley,0,0,...,South Africa,AD Hales,True,0.538462,127,12,26,14,2.469542,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,800477,2nd innings,South Africa,England,42.3,AB de Villiers,D Wiese,RJW Topley,2,0,...,South Africa,AB de Villiers,True,0.538462,127,12,26,14,2.469542,3
183,800477,2nd innings,South Africa,England,42.4,AB de Villiers,D Wiese,RJW Topley,1,0,...,South Africa,AB de Villiers,True,0.538462,127,12,26,14,2.469542,3
184,800477,2nd innings,South Africa,England,42.5,D Wiese,AB de Villiers,RJW Topley,0,1,...,South Africa,AB de Villiers,True,0.538462,127,12,26,14,2.469542,3
185,800477,2nd innings,South Africa,England,42.6,D Wiese,AB de Villiers,RJW Topley,0,0,...,South Africa,AB de Villiers,True,0.538462,127,12,26,14,2.469542,3


In [22]:
#batsman_df

In [23]:
#country_list

# loop

In [45]:
bowler_performance_list = []
for selected_country in tqdm(country_list):
    #print(selected_country)
    country_games = match_list_df[(match_list_df['date']>=performance_cutoff_date_start) 
                             & (match_list_df['date']<=performance_cutoff_date_end)
                             & ( (match_list_df['first_innings']==selected_country)
                                |(match_list_df['second_innings']==selected_country)
                             )]
    match_id_list = list(country_games['match_id'])
    match_stat_list = []
    for match_id in match_id_list:
        if country_games[country_games['match_id']==match_id]['train_data'].values[0] == True:
            match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
        else:
            match_df = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
        match_stat_list.append(match_df)
            
    
    match_stat_df = pd.concat(match_stat_list)
    match_stat_df.fillna('NA',inplace=True)

    match_stat_df = match_stat_df.merge(country_games,how='inner',on='match_id')
    bowler_list = list(match_stat_df[match_stat_df['opponent']==selected_country]['bowler'].unique())

    for selected_bowler in tqdm(bowler_list):
        #print(selected_batsman)

        bowler_df = match_stat_df[match_stat_df['bowler']==selected_bowler]
        total_runs = bowler_df['total'].sum()
        run_rate = total_runs/bowler_df.shape[0]
        negative_rate = -run_rate

        # no_of_wickets,wicket_rate,wicket_per_runs
        no_of_wickets = bowler_df['wicket'].sum()-bowler_df[bowler_df['wicket_type']=='run out'].shape[0]
        wickets_per_match = no_of_wickets/len(list(bowler_df['match_id'].unique()))
        wickets_per_run = no_of_wickets/total_runs


        team_score = country_rank[country_rank['country']==selected_country]['score'].values[0]
        #opponent_mean

        bowler_df.rename(columns={'team':'country'},inplace=True)
        bowler_df=bowler_df.merge(country_rank,on='country',how='inner')
        opponent_mean = bowler_df[['match_id','country','score']].groupby(['match_id']).min().reset_index()['score'].mean()
        matches_played = len(list(bowler_df['match_id'].unique()))
        player_of_the_match = country_games[country_games['player_of_match']==selected_bowler].shape[0]

        #winning contribution(effectiveness)-% of wickets taken in winning matches
        country_win_list = list(country_games[country_games['winner']==selected_country]['match_id'])
        winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]

        if winning_match_df['wicket'].sum() !=0:
            winning_contribution = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df['wicket'].sum()
        else:
            winning_contribution = 0
        
        #winning_wicket_per_run rate contribution
        #winning wicket_per_match contirbution
 
        team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
        bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
        winning_wicket_per_run_rate_contribution = bowler_wicket_per_run/team_wickets_per_run

        team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()
        bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()
        winning_wicket_per_match_contribution = bowler_wicket_per_match/team_wicket_per_match
        
        no_of_wins=winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()
        #consistency
        #consistency = 1/match_stat_df[match_stat_df['bowler']==selected_bowler].groupby(['match_id'])['wicket'].sum().reset_index()['wicket'].std()

        bowler_dict = {
            'bowler':selected_bowler,
            'country':selected_country,
            'negative_rate':negative_rate,
            'no_of_wickets':no_of_wickets,
            'wickets_per_match':wickets_per_match,
            'wickets_per_run':wickets_per_run,
            'no_of_wins':no_of_wins,
            'team_score':team_score,
            'opponent_mean':opponent_mean,
            'winning_contribution':winning_contribution,
            'winning_wicket_rate_contribution':winning_wicket_per_match_contribution,
            
            
        }

        bowler_performance_list.append(bowler_dict)
        
bowler_performance_df = pd.DataFrame(bowler_performance_list)
bowler_performance_df.fillna(0,inplace=True)
bowler_performance_df['bowler_score']=scaler.fit_transform(bowler_performance_df.drop(columns=['bowler','country'])).sum(axis=1)
bowler_performance_df.sort_values('bowler_score',ascending=False,inplace=True)
bowler_performance_df.to_csv('csv_data/bowler_perfromance_'+year+'.csv',index=False)



HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))

  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))





In [46]:
#country_games[country_games['match_id']==match_id]['train_data'].values[0]

In [47]:
bowler_performance_df

Unnamed: 0,bowler,country,negative_rate,no_of_wickets,wickets_per_match,wickets_per_run,no_of_wins,team_score,opponent_mean,winning_contribution,winning_wicket_rate_contribution,bowler_score
5,RA Jadeja,India,-0.712786,45,1.551724,0.038995,19,3.123274,2.306804,0.140741,0.229885,6.399021
79,Saeed Ajmal,Pakistan,-0.687032,55,1.964286,0.049909,13,2.833494,2.467780,0.105263,0.228222,6.276592
27,R McLaren,South Africa,-0.817621,42,1.909091,0.045259,13,3.026762,2.677940,0.116466,0.228797,6.125251
76,Junaid Khan,Pakistan,-0.817167,44,1.913043,0.046218,12,2.833494,2.369989,0.112782,0.264901,5.985333
179,MJ McClenaghan,New Zealand,-0.864567,32,2.909091,0.058288,6,1.666228,2.637294,0.170000,0.288136,5.825006
...,...,...,...,...,...,...,...,...,...,...,...,...
272,K Kamyuka,Canada,-0.750000,0,0.000000,0.000000,0,0.642389,0.000000,0.000000,0.000000,0.833333
267,N Dutta,Canada,-0.815789,0,0.000000,0.000000,0,0.642389,0.000000,0.000000,0.000000,0.807018
269,Raza-ur-Rehman,Canada,-0.821429,0,0.000000,0.000000,0,0.642389,0.000000,0.000000,0.000000,0.804762
264,Rizwan Cheema,Canada,-0.875000,0,0.000000,0.000000,0,0.642389,0.000000,0.000000,0.000000,0.783333


# Outer Loop

In [None]:
for year in tqdm(['2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019']):
    performance_cutoff_date_start = datetime.strptime(year+'-01-01','%Y-%m-%d')
    performance_cutoff_date_end = datetime.strptime(year+'-12-31','%Y-%m-%d')
    scaler = MinMaxScaler()
    country_rank = pd.read_csv('csv_data/country_rank_'+year+'.csv')
    
    country_list = list(country_rank['country'])
    
    bowler_performance_list = []
    for selected_country in tqdm(country_list):
        #print(selected_country)
        country_games = match_list_df[(match_list_df['date']>=performance_cutoff_date_start) 
                                 & (match_list_df['date']<=performance_cutoff_date_end)
                                 & ( (match_list_df['first_innings']==selected_country)
                                    |(match_list_df['second_innings']==selected_country)
                                 )]
        match_id_list = list(country_games['match_id'])
        match_stat_list = []
        for match_id in match_id_list:
            if country_games[country_games['match_id']==match_id]['train_data'].values[0] == True:
                match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
            else:
                match_df = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
            match_stat_list.append(match_df)


        match_stat_df = pd.concat(match_stat_list)
        match_stat_df.fillna('NA',inplace=True)

        match_stat_df = match_stat_df.merge(country_games,how='inner',on='match_id')
        bowler_list = list(match_stat_df[match_stat_df['opponent']==selected_country]['bowler'].unique())

        for selected_bowler in tqdm(bowler_list):
            #print(selected_batsman)

            bowler_df = match_stat_df[match_stat_df['bowler']==selected_bowler]
            total_runs = bowler_df['total'].sum()
            run_rate = total_runs/bowler_df.shape[0]
            negative_rate = -run_rate

            # no_of_wickets,wicket_rate,wicket_per_runs
            no_of_wickets = bowler_df['wicket'].sum()-bowler_df[bowler_df['wicket_type']=='run out'].shape[0]
            wickets_per_match = no_of_wickets/len(list(bowler_df['match_id'].unique()))
            wickets_per_run = no_of_wickets/total_runs


            team_score = country_rank[country_rank['country']==selected_country]['score'].values[0]
            #opponent_mean

            bowler_df.rename(columns={'team':'country'},inplace=True)
            bowler_df=bowler_df.merge(country_rank,on='country',how='inner')
            opponent_mean = bowler_df[['match_id','country','score']].groupby(['match_id']).min().reset_index()['score'].mean()
            matches_played = len(list(bowler_df['match_id'].unique()))
            player_of_the_match = country_games[country_games['player_of_match']==selected_bowler].shape[0]

            #winning contribution(effectiveness)-% of wickets taken in winning matches
            country_win_list = list(country_games[country_games['winner']==selected_country]['match_id'])
            winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]

            if winning_match_df['wicket'].sum() !=0:
                winning_contribution = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df['wicket'].sum()
            else:
                winning_contribution = 0

            #winning_wicket_per_run rate contribution
            #winning wicket_per_match contirbution

            team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
            bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
            winning_wicket_per_run_rate_contribution = bowler_wicket_per_run/team_wickets_per_run

            team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()
            bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()
            winning_wicket_per_match_contribution = bowler_wicket_per_match/team_wicket_per_match

            no_of_wins=winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()
            #consistency
            #consistency = 1/match_stat_df[match_stat_df['bowler']==selected_bowler].groupby(['match_id'])['wicket'].sum().reset_index()['wicket'].std()

            bowler_dict = {
                'bowler':selected_bowler,
                'country':selected_country,
                'negative_rate':negative_rate,
                'no_of_wickets':no_of_wickets,
                'wickets_per_match':wickets_per_match,
                'wickets_per_run':wickets_per_run,
                'no_of_wins':no_of_wins,
                'team_score':team_score,
                'opponent_mean':opponent_mean,
                'winning_contribution':winning_contribution,
                'winning_wicket_rate_contribution':winning_wicket_per_match_contribution,


            }

            bowler_performance_list.append(bowler_dict)

    bowler_performance_df = pd.DataFrame(bowler_performance_list)
    bowler_performance_df.fillna(0,inplace=True)
    bowler_performance_df['bowler_score']=scaler.fit_transform(bowler_performance_df.drop(columns=['bowler','country'])).sum(axis=1)
    bowler_performance_df.sort_values('bowler_score',ascending=False,inplace=True)
    bowler_performance_df.to_csv('csv_data/bowler_perfromance_'+year+'.csv',index=False)




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))

  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()


HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(





HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))

  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(





  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==selected_country]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==selected_country]['wicket'].sum()/winning_match_df['match_id'].nunique()






HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==selected_bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==selected_bowler]['match_id'].nunique()





HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))