In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm

In [2]:
custom_date_parser = lambda x: datetime.strptime(x, "%Y-%m-%d")
match_list_df = pd.read_csv('csv_data/match_list.csv',parse_dates=['date'],date_parser=custom_date_parser)

# jump to outer loop

In [61]:
year='2013'
performance_cutoff_date_start = datetime.strptime(year+'-01-01','%Y-%m-%d')
performance_cutoff_date_end = datetime.strptime(year+'-12-31','%Y-%m-%d')
scaler = MinMaxScaler()


In [62]:
country_rank = pd.read_csv('csv_data/country_rank_'+year+'.csv')

In [63]:
country_list = list(country_rank['country'])

# Jump to loop

In [21]:
selected_country = country_list[0]
selected_country

'Australia'

In [22]:
country_games = match_list_df[(match_list_df['date']>=performance_cutoff_date_start) 
                             & (match_list_df['date']<=performance_cutoff_date_end)
                             & ( (match_list_df['first_innings']==selected_country)
                                |(match_list_df['second_innings']==selected_country)
                             )]

In [23]:
country_games

Unnamed: 0,match_id,date,location,first_innings,second_innings,winner,win_by,win_dif,toss_winner,player_of_match,train_data
7,406202,2010-01-22,Brisbane,Pakistan,Australia,Australia,wickets,5,Pakistan,CL White,True
8,406203,2010-01-24,Sydney Cricket Ground,Australia,Pakistan,Australia,runs,140,Pakistan,SR Watson,True
9,406204,2010-01-26,Adelaide Oval,Australia,Pakistan,Australia,runs,40,Australia,RJ Harris,True
10,406205,2010-01-29,Perth,Australia,Pakistan,Australia,runs,135,Australia,RJ Harris,True
11,406206,2010-01-31,Perth,Pakistan,Australia,Australia,wickets,2,Pakistan,CJ McKay,True
13,406192,2010-02-07,Melbourne Cricket Ground,Australia,West Indies,Australia,runs,113,West Indies,SR Watson,True
15,406193,2010-02-09,Adelaide Oval,West Indies,Australia,Australia,wickets,8,West Indies,DE Bollinger,True
17,406195,2010-02-14,Brisbane,Australia,West Indies,Australia,runs,50,West Indies,RT Ponting,True
18,406196,2010-02-19,Melbourne Cricket Ground,Australia,West Indies,Australia,runs,125,Australia,JR Hopes,True
24,423791,2010-03-03,Napier,Australia,New Zealand,New Zealand,wickets,2,Australia,LRPL Taylor,True


In [24]:
match_id_list = list(country_games['match_id'])

In [25]:
match_stat_list = []
for match_id in match_id_list:
    match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
    match_stat_list.append(match_df)

In [26]:
match_stat_df = pd.concat(match_stat_list)

In [27]:
match_stat_df.fillna('NA',inplace=True)

In [28]:
match_stat_df = match_stat_df.merge(country_games,how='inner',on='match_id')

In [29]:
batsman_list = list(match_stat_df[match_stat_df['team']==selected_country]['batsman'].unique())
batsman_list

['SR Watson',
 'SE Marsh',
 'RT Ponting',
 'MJ Clarke',
 'CL White',
 'MEK Hussey',
 'BJ Haddin',
 'NM Hauritz',
 'JR Hopes',
 'RJ Harris',
 'AC Voges',
 'MG Johnson',
 'TD Paine',
 'CJ McKay',
 'DE Bollinger',
 'SPD Smith',
 'SW Tait',
 'JW Hastings',
 'XJ Doherty']

In [30]:
selected_batsman = batsman_list[0]

In [31]:
batsman_df = match_stat_df[match_stat_df['batsman']==selected_batsman]
total_runs = batsman_df['scored_runs'].sum()
run_rate = batsman_df['scored_runs'].sum()/match_stat_df[match_stat_df['batsman']==selected_batsman].shape[0]
team_score = country_rank[country_rank['country']==selected_country]['score'].values[0]
#opponent_mean

batsman_df.rename(columns={'opponent':'country'},inplace=True)
batsman_df=batsman_df.merge(country_rank,on='country',how='inner')
opponent_mean = batsman_df[['match_id','country','score']].groupby(['match_id']).min().reset_index()['score'].mean()
matches_played = len(list(batsman_df['match_id'].unique()))
player_of_the_match = country_games[country_games['player_of_match']==selected_batsman].shape[0]

#winning contribution(effectiveness)-% of winning score
country_win_list = list(country_games[country_games['winner']==selected_country]['match_id'])
winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]
winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()

#run_rate_effectiveness
country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]
batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]

run_rate_effectiveness = batsman_run_rate/country_run_rate

#consistency
consistency = 1/batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].std()
average_score = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].mean()



total_runs,run_rate,team_score,opponent_mean,matches_played,player_of_the_match,winning_contribution,run_rate_effectiveness

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


(652,
 0.8681757656458056,
 3.368462523392101,
 2.176401722802647,
 17,
 2,
 0.11653356994971902,
 1.0030029099165771)

In [31]:
#batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].std()

In [32]:
#batsman_df

In [33]:
#country_list

# loop

In [64]:
batsman_performance_list = []
for selected_country in tqdm(country_list):
    #print(selected_country)
    country_games = match_list_df[(match_list_df['date']>=performance_cutoff_date_start) 
                             & (match_list_df['date']<=performance_cutoff_date_end)
                             & ( (match_list_df['first_innings']==selected_country)
                                |(match_list_df['second_innings']==selected_country)
                             )]
    match_id_list = list(country_games['match_id'])
    match_stat_list = []
    for match_id in match_id_list:
        if country_games[country_games['match_id']==match_id]['train_data'].values[0] == True:
            match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
        else:
            match_df = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
        match_stat_list.append(match_df)
            
    
    match_stat_df = pd.concat(match_stat_list)
    match_stat_df.fillna('NA',inplace=True)

    match_stat_df = match_stat_df.merge(country_games,how='inner',on='match_id')
    batsman_list = list(match_stat_df[match_stat_df['team']==selected_country]['batsman'].unique())

    for selected_batsman in tqdm(batsman_list):
        #print(selected_batsman)

        batsman_df = match_stat_df[match_stat_df['batsman']==selected_batsman]

        total_runs = batsman_df['scored_runs'].sum()
        run_rate = batsman_df['scored_runs'].sum()/match_stat_df[match_stat_df['batsman']==selected_batsman].shape[0]
        team_score = country_rank[country_rank['country']==selected_country]['score'].values[0]
        #opponent_mean

        batsman_df.rename(columns={'opponent':'country'},inplace=True)
        batsman_df=batsman_df.merge(country_rank,on='country',how='inner')
        opponent_mean = batsman_df[['match_id','country','score']].groupby(['match_id']).min().reset_index()['score'].mean()
        #matches_played = len(list(batsman_df['match_id'].unique()))
        player_of_the_match = country_games[country_games['player_of_match']==selected_batsman].shape[0]

        #winning contribution(effectiveness)-% of winning score
        country_win_list = list(country_games[country_games['winner']==selected_country]['match_id'])
        winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]
        winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()

        #run_rate_effectiveness
        country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]
        batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]

        run_rate_effectiveness = batsman_run_rate/country_run_rate
        
        batting_std = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].std()
        
        consistency = 1/batting_std if batting_std!=0 else 1
        average_score = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].mean()

        batsman_dict = {
            'batsman':selected_batsman,
            'country':selected_country,
            'total_runs':total_runs,
            'run_rate':run_rate,
            'average_score':average_score,
            'team_score':team_score,
            'opponent_mean':opponent_mean,
            #'matches_played':matches_played,
            'player_of_the_match':player_of_the_match,
            'winning_contribution':winning_contribution,
            'run_rate_effectiveness':run_rate_effectiveness,
            'consistency':consistency
        }

        batsman_performance_list.append(batsman_dict)
        
batsman_performance_df = pd.DataFrame(batsman_performance_list)
batsman_performance_df.fillna(0,inplace=True)
batsman_performance_df['batsman_score']=scaler.fit_transform(batsman_performance_df.drop(columns=['batsman','country','consistency'])).sum(axis=1)
batsman_performance_df.sort_values('batsman_score',ascending=False,inplace=True)
batsman_performance_df.to_csv('csv_data/batsman_perfromance_'+year+'.csv',index=False)



HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=39.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=41.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=41.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=37.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))





In [65]:
#country_games[country_games['match_id']==match_id]['train_data'].values[0]

In [66]:
batsman_performance_df

Unnamed: 0,batsman,country,total_runs,run_rate,average_score,team_score,opponent_mean,player_of_the_match,winning_contribution,run_rate_effectiveness,consistency,batsman_score
2,V Kohli,India,1113,0.957831,41.222222,3.123274,2.416876,4,0.180220,1.160715,0.026393,5.514248
26,S Dhawan,India,1016,0.947761,48.380952,3.123274,2.213871,3,0.201978,1.102205,0.023660,5.236679
104,Mohammad Hafeez,Pakistan,1048,0.823252,37.428571,2.833494,2.403462,4,0.179245,1.141447,0.023538,5.224628
107,Misbah-ul-Haq,Pakistan,1166,0.705384,43.185185,2.833494,2.412962,4,0.121593,0.869527,0.036835,5.077757
38,AB de Villiers,South Africa,1001,0.936389,45.500000,3.026762,2.772845,2,0.177040,1.156510,0.026373,5.017653
...,...,...,...,...,...,...,...,...,...,...,...,...
404,HS Baidwan,Canada,2,0.250000,0.000000,0.642389,0.000000,0,0.000000,0.000000,0.000000,0.120136
400,A Bagai,Canada,3,0.176471,0.000000,0.642389,0.000000,0,0.000000,0.000000,0.000000,0.086164
405,JOA Gordon,Canada,2,0.142857,0.000000,0.642389,0.000000,0,0.000000,0.000000,0.000000,0.069384
399,Hamza Tariq,Canada,0,0.000000,0.000000,0.642389,0.000000,0,0.000000,0.000000,0.000000,0.000000


# Outer Loop

In [3]:
for year in tqdm(['2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019']):
    
    performance_cutoff_date_start = datetime.strptime(year+'-01-01','%Y-%m-%d')
    performance_cutoff_date_end = datetime.strptime(year+'-12-31','%Y-%m-%d')
    scaler = MinMaxScaler()
    
    country_rank = pd.read_csv('csv_data/country_rank_'+year+'.csv')
    country_list = list(country_rank['country'])
    
    
    batsman_performance_list = []
    for selected_country in tqdm(country_list):
        #print(selected_country)
        country_games = match_list_df[(match_list_df['date']>=performance_cutoff_date_start) 
                                 & (match_list_df['date']<=performance_cutoff_date_end)
                                 & ( (match_list_df['first_innings']==selected_country)
                                    |(match_list_df['second_innings']==selected_country)
                                 )]
        match_id_list = list(country_games['match_id'])
        match_stat_list = []
        for match_id in match_id_list:
            if country_games[country_games['match_id']==match_id]['train_data'].values[0] == True:
                match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
            else:
                match_df = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
            match_stat_list.append(match_df)


        match_stat_df = pd.concat(match_stat_list)
        match_stat_df.fillna('NA',inplace=True)

        match_stat_df = match_stat_df.merge(country_games,how='inner',on='match_id')
        batsman_list = list(match_stat_df[match_stat_df['team']==selected_country]['batsman'].unique())

        for selected_batsman in tqdm(batsman_list):
            #print(selected_batsman)

            batsman_df = match_stat_df[match_stat_df['batsman']==selected_batsman]

            total_runs = batsman_df['scored_runs'].sum()
            run_rate = batsman_df['scored_runs'].sum()/match_stat_df[match_stat_df['batsman']==selected_batsman].shape[0]
            team_score = country_rank[country_rank['country']==selected_country]['score'].values[0]
            #opponent_mean

            batsman_df.rename(columns={'opponent':'country'},inplace=True)
            batsman_df=batsman_df.merge(country_rank,on='country',how='inner')
            opponent_mean = batsman_df[['match_id','country','score']].groupby(['match_id']).min().reset_index()['score'].mean()
            #matches_played = len(list(batsman_df['match_id'].unique()))
            player_of_the_match = country_games[country_games['player_of_match']==selected_batsman].shape[0]

            #winning contribution(effectiveness)-% of winning score
            country_win_list = list(country_games[country_games['winner']==selected_country]['match_id'])
            winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]
            winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()

            #run_rate_effectiveness
            country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]
            batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]

            run_rate_effectiveness = batsman_run_rate/country_run_rate

            batting_std = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].std()

            consistency = 1/batting_std if batting_std!=0 else 1
            average_score = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].mean()

            batsman_dict = {
                'batsman':selected_batsman,
                'country':selected_country,
                'total_runs':total_runs,
                'run_rate':run_rate,
                'average_score':average_score,
                'team_score':team_score,
                'opponent_mean':opponent_mean,
                #'matches_played':matches_played,
                'player_of_the_match':player_of_the_match,
                'winning_contribution':winning_contribution,
                'run_rate_effectiveness':run_rate_effectiveness,
                'consistency':consistency
            }

            batsman_performance_list.append(batsman_dict)

    batsman_performance_df = pd.DataFrame(batsman_performance_list)
    batsman_performance_df.fillna(0,inplace=True)
    batsman_performance_df['batsman_score']=scaler.fit_transform(batsman_performance_df.drop(columns=['batsman','country','consistency'])).sum(axis=1)
    batsman_performance_df.sort_values('batsman_score',ascending=False,inplace=True)
    batsman_performance_df.to_csv('csv_data/batsman_perfromance_'+year+'.csv',index=False)




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=32.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=27.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]






HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]






HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]





HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))





HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=34.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]






HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

  winning_contribution = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==selected_country]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==selected_country].shape[0]






HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==selected_batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==selected_batsman].shape[0]





HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=29.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))






In [50]:
#batsman_performance_df.fillna(0,inplace=True)
#batsman_performance_df.to_csv('csv_data/batsman_perfromance_'+year+'.csv',index=False)

In [24]:
#batsman_performance_df

In [17]:
#batsman_performance_df['batsman_score']=scaler.fit_transform(batsman_performance_df.drop(columns=['batsman','country'])).sum(axis=1)

In [18]:
#batsman_performance_df.sort_values('batsman_score',ascending=False,inplace=True)

In [25]:
#batsman_performance_df

In [21]:
#batsman_performance_df[batsman_performance_df['consistency']==1]