In this version we shall consider vectors of teams and players from immediate performance

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
import dateutil
from sklearn.linear_model import LinearRegression

In [2]:
#!pip install python-dateutil

In [3]:
#!ls csv_data/

In [4]:
custom_date_parser = lambda x: datetime.strptime(x, "%Y-%m-%d")

In [5]:
cutoff_start_year = '2008'
cutoff_start_date = datetime.strptime(cutoff_start_year+'-01-01','%Y-%m-%d')

In [6]:
match_summary_df = pd.read_csv('csv_data/match_list.csv',parse_dates=['date'],date_parser=custom_date_parser)
#country_rank_df = pd.read_csv('csv_data/')

In [7]:
recent_match_summary_df = match_summary_df[match_summary_df['date']>=cutoff_start_date]
recent_match_summary_df.shape

(1246, 11)

In [8]:
recent_match_summary_df

Unnamed: 0,match_id,date,location,first_innings,second_innings,winner,win_by,win_dif,toss_winner,player_of_match,train_data
131,298805,2008-01-20,Centurion,West Indies,South Africa,South Africa,wickets,6,South Africa,JP Duminy,True
132,325800,2008-01-21,Karachi,Pakistan,Zimbabwe,Pakistan,runs,104,Zimbabwe,Nasir Jamshed,True
133,325801,2008-01-24,Sind,Zimbabwe,Pakistan,Pakistan,wickets,5,Zimbabwe,Sohail Tanvir,True
134,298806,2008-01-25,Cape Town,South Africa,West Indies,South Africa,runs,86,South Africa,SM Pollock,True
135,325802,2008-01-27,Multan Cricket Stadium,Pakistan,Zimbabwe,Pakistan,runs,37,Pakistan,Shahid Afridi,True
...,...,...,...,...,...,...,...,...,...,...,...
1372,1198240,2020-09-16,Manchester,England,Australia,Australia,wickets,3,England,GJ Maxwell,False
1373,1233461,2020-10-30,Rawalpindi Cricket Stadium,Pakistan,Zimbabwe,Pakistan,runs,26,Pakistan,BRM Taylor,False
1374,1233462,2020-11-01,Rawalpindi Cricket Stadium,Zimbabwe,Pakistan,Pakistan,wickets,6,Zimbabwe,Iftikhar Ahmed,False
1375,1223955,2020-11-27,Sydney Cricket Ground,Australia,India,Australia,runs,66,Australia,SPD Smith,False


In [9]:
match_id_list = list(recent_match_summary_df['match_id'])

# Feature Engineering functions

In [10]:
team_global_cache = {}
batsman_global_cache = {}
bowler_global_cache = {}

In [11]:
def get_trend(input_df,team_opponent,team_name,target_field):
    input_df.rename(columns={'winner':'winning_team'},inplace=True)

    selected_match_id_list = list(input_df['match_id'])
    match_detail_list = []
    for match_id in selected_match_id_list:
        if input_df[input_df['match_id']==match_id]['train_data'].values[0]==True:
            match_info = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
        else:
            match_info = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
        match_detail_list.append(match_info)
    match_detail_df = pd.concat(match_detail_list)
    match_detail_df.fillna('NA',inplace=True)

    match_detail_df = input_df.merge(match_detail_df,how='inner',on='match_id')


    sorted_df = match_detail_df[match_detail_df[team_opponent].isin(team_name)].groupby('match_id').agg({'date': 'min', target_field: 'sum'}).reset_index()
    sorted_df.sort_values('date',inplace=True)

    y=np.array(sorted_df[target_field])
    x=np.array(range(sorted_df.shape[0])).reshape(-1,1)+1
    linear_trend_model = LinearRegression()
    linear_trend_model.fit(x,y)
    next_instance_num = x.shape[0]+1
    
    base = linear_trend_model.intercept_
    trend = linear_trend_model.coef_[0]
    trend_predict = linear_trend_model.predict(np.array([next_instance_num]).reshape(-1,1))[0]
    mean = sorted_df[target_field].mean()
    
    return base,trend,trend_predict,mean


    
    

In [12]:
def find_similar_team(search_country,country_rank_df):
    search_rank = country_rank_df[country_rank_df['country']==search_country]['rank'].values[0]
    if search_rank == 1:
        similar_rank_list = [2]
    elif search_rank == country_rank_df['rank'].max():
        similar_rank_list = [country_rank_df['rank'].max()-1]
    else:
        similar_rank_list = [search_rank-1,search_rank+1]
    return list(country_rank_df[country_rank_df['rank'].isin(similar_rank_list)]['country'])
    

In [13]:
def get_recent_team_performance(match_summary_df,team,match_date,global_cache={}):
    ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
    match_dt = datetime.utcfromtimestamp(ts)
    
    two_months = dateutil.relativedelta.relativedelta(months=2)
    a_year = dateutil.relativedelta.relativedelta(years=1)
    check_range = match_dt - two_months
    if team in global_cache and\
    global_cache[team]['date']>=check_range and\
    global_cache[team]['date']<=match_dt:
        return global_cache[team]['performance'],global_cache
    else:
        cutoff_date = match_dt-a_year
        win_count = match_summary_df[(match_summary_df['date']>=cutoff_date) & \
                                     (match_summary_df['date']<match_date) &\
                                     (match_summary_df['winner']==team)].shape[0]
        total_win_by_runs = match_summary_df[(match_summary_df['date']>=cutoff_date) &\
                                             (match_summary_df['date']<match_date) &\
                                             (match_summary_df['winner']==team) &\
                                             (match_summary_df['win_by']=='runs')]['win_dif'].sum()
        total_win_by_wickets = match_summary_df[(match_summary_df['date']>=cutoff_date) & \
                                                (match_summary_df['date']<match_date) &\
                                                (match_summary_df['winner']==team) & \
                                                (match_summary_df['win_by']=='wickets')]['win_dif'].sum()
        matches_played = match_summary_df[(match_summary_df['date']>=cutoff_date) &\
                                          (match_summary_df['date']<match_date) &\
                                          ( \
                                           (match_summary_df['first_innings']==team) | \
                                           (match_summary_df['second_innings']==team)\
                                          )].shape[0]
        if matches_played != 0:
            win_ratio = win_count/matches_played
        else:
            win_ratio =0
        
        total_loss_by_runs = match_summary_df[\
                                              (match_summary_df['date']>=cutoff_date) &\
                                              (match_summary_df['date']<match_date) &\
                                              (match_summary_df['second_innings']==team)&\
                                              (match_summary_df['winner']!=team) &\
                                              (match_summary_df['win_by']=='runs')\
                                             ]['win_dif'].sum()
        
        total_loss_by_wickets = match_summary_df[\
                                              (match_summary_df['date']>=cutoff_date) &\
                                              (match_summary_df['date']<match_date) &\
                                              (match_summary_df['first_innings']==team)&\
                                              (match_summary_df['winner']!=team) &\
                                              (match_summary_df['win_by']=='wickets')\
                                             ]['win_dif'].sum()
        effective_win_by_runs = total_win_by_runs-total_loss_by_runs
        effective_win_by_wickets = total_win_by_wickets-total_loss_by_wickets
        performance = {
                'country':team,
                 'win_ratio':win_ratio,
                 'effective_win_by_runs':effective_win_by_runs,
                 'effective_win_by_wickets':effective_win_by_wickets,
                 'matches_played':matches_played,
                 'win_count':win_count
            }
        
        global_cache[team]={
            'date':match_dt,
            'performance':performance
            
        }
        
    return performance,global_cache
        
        
    

In [14]:
def get_recent_batsman_performance(match_summary_df,team,batsman,match_date,global_cache={}):
    ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
    match_dt = datetime.utcfromtimestamp(ts)
    
    two_months = dateutil.relativedelta.relativedelta(months=2)
    a_year = dateutil.relativedelta.relativedelta(years=1)
    check_range = match_dt - two_months
    if batsman in global_cache and \
    global_cache[batsman]['date']>=check_range and \
    global_cache[batsman]['date']<=match_dt:
        return global_cache[batsman]['performance'],global_cache
    else:
        cutoff_date = match_dt-a_year
        country_games = match_summary_df[(match_summary_df['date']>=cutoff_date) 
                             & (match_summary_df['date']<match_date)
                             & ( (match_summary_df['first_innings']==team)
                                |(match_summary_df['second_innings']==team)
                             )]
        match_id_list = list(country_games['match_id'])
        
        if len(match_id_list) == 0:
            return None, global_cache
        
        match_stat_list = []
        for match_id in match_id_list:
            if country_games[country_games['match_id']==match_id]['train_data'].values[0] == True:
                match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
            else:
                match_df = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
            match_stat_list.append(match_df)


        match_stat_df = pd.concat(match_stat_list)
        match_stat_df.fillna('NA',inplace=True)
        
        batsman_df = match_stat_df[match_stat_df['batsman']==batsman]
        if batsman_df.shape[0]==0:
            return None,global_cache
        
        total_runs = batsman_df['scored_runs'].sum()
        run_rate = batsman_df['scored_runs'].sum()/match_stat_df[match_stat_df['batsman']==batsman].shape[0]
        
        #opponent_variability
        #batsman_df.rename(columns={'opponent':'country'},inplace=True)
        opponent_variability = batsman_df['opponent'].nunique()
          
        #matches_played = len(list(batsman_df['match_id'].unique()))
        player_of_the_match = country_games[country_games['player_of_match']==batsman].shape[0]

        #winning contribution(effectiveness)-% of winning score
        country_win_list = list(country_games[country_games['winner']==team]['match_id'])
        winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]
        winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()

        #run_rate_effectiveness
        country_run_rate = winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team].shape[0]
        batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]

        run_rate_effectiveness = batsman_run_rate/country_run_rate
        
        #batting_std = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].std()
        
        #consistency = 1/batting_std if batting_std!=0 else 1
        average_score = batsman_df.groupby(['match_id'])['scored_runs'].sum().reset_index()['scored_runs'].mean()

        performance = {
            'batsman':batsman,
            'country':team,
            'total_runs':total_runs,
            'run_rate':run_rate,
            'average_score':average_score,
            'opponent_variability':opponent_variability,
            #'matches_played':matches_played,
            'player_of_the_match':player_of_the_match,
            'winning_contribution':winning_contribution,
            'run_rate_effectiveness':run_rate_effectiveness,
            #'consistency':consistency
        }
        
        global_cache[batsman]={
            'date':match_dt,
            'performance':performance
        }
        
    return performance,global_cache

In [15]:
def get_recent_bowler_performance(match_summary_df,team,bowler,match_date,global_cache={}):
    ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
    match_dt = datetime.utcfromtimestamp(ts)
    
    two_months = dateutil.relativedelta.relativedelta(months=2)
    a_year = dateutil.relativedelta.relativedelta(years=1)
    check_range = match_dt - two_months
    if bowler in global_cache and \
    global_cache[bowler]['date']>=check_range and \
    global_cache[bowler]['date']<=match_dt:
        
        return global_cache[bowler]['performance'],global_cache
    else:
        cutoff_date = match_dt-a_year
        country_games = match_summary_df[(match_summary_df['date']>=cutoff_date) 
                             & (match_summary_df['date']<match_date)
                             & ( (match_summary_df['first_innings']==team)
                                |(match_summary_df['second_innings']==team)
                             )]
        match_id_list = list(country_games['match_id'])
        
        if len(match_id_list) == 0:
            return None, global_cache
        
        match_stat_list = []
        for match_id in match_id_list:
            if country_games[country_games['match_id']==match_id]['train_data'].values[0] == True:
                match_df = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
            else:
                match_df = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
            match_stat_list.append(match_df)


        match_stat_df = pd.concat(match_stat_list)
        match_stat_df.fillna('NA',inplace=True)
        
        bowler_df = match_stat_df[match_stat_df['bowler']==bowler]
        if bowler_df.shape[0]==0:
            return None,global_cache
        
        total_runs = bowler_df['total'].sum()
        run_rate = total_runs/bowler_df.shape[0]
        negative_rate = -run_rate

        # no_of_wickets,wicket_rate,wicket_per_runs
        no_of_wickets = bowler_df['wicket'].sum()-bowler_df[bowler_df['wicket_type']=='run out'].shape[0]
        wickets_per_match = no_of_wickets/len(list(bowler_df['match_id'].unique()))
        wickets_per_run = no_of_wickets/total_runs


        
        #opponent_variability
        opponent_variability = bowler_df['team'].nunique()

        
        matches_played = len(list(bowler_df['match_id'].unique()))
        #player_of_the_match = country_games[country_games['player_of_match']==selected_bowler].shape[0]

        #winning contribution(effectiveness)-% of wickets taken in winning matches
        country_win_list = list(country_games[country_games['winner']==team]['match_id'])
        winning_match_df = match_stat_df[match_stat_df['match_id'].isin(country_win_list)]

        if winning_match_df['wicket'].sum() !=0:
            winning_contribution = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df['wicket'].sum()
        else:
            winning_contribution = 0
        
        #winning_wicket_per_run rate contribution
        #winning wicket_per_match contirbution
 
        team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
        bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
        winning_wicket_per_run_rate_contribution = bowler_wicket_per_run/team_wickets_per_run

        team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
        bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
        winning_wicket_per_match_contribution = bowler_wicket_per_match/team_wicket_per_match
        
        no_of_wins=winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
        #consistency
        #consistency = 1/match_stat_df[match_stat_df['bowler']==selected_bowler].groupby(['match_id'])['wicket'].sum().reset_index()['wicket'].std()

        performance = {
            'bowler':bowler,
            'country':team,
            'negative_rate':negative_rate,
            'no_of_wickets':no_of_wickets,
            'wickets_per_match':wickets_per_match,
            'wickets_per_run':wickets_per_run,
            'no_of_wins':no_of_wins,
            #'team_score':team_score,
            'opponent_variability':opponent_variability,
            'winning_contribution':winning_contribution,
            'winning_wicket_rate_contribution':winning_wicket_per_match_contribution,
            
            
        }
        
        global_cache[bowler]={
            'date':match_dt,
            'performance':performance
        }
        
    return performance,global_cache

In [16]:
# a_month = dateutil.relativedelta.relativedelta(months=1)
# a_year = dateutil.relativedelta.relativedelta(years=1)
# cutoff_start_date-a_year
#type(cutoff_start_date)
#datetime.utcfromtimestamp(match_date)
# match_date
# ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
# some_date = datetime.utcfromtimestamp(ts)
# recent_match_summary_df[recent_match_summary_df['date']>some_date]

# Jump to loop

In [17]:
selected_match_id = match_id_list[0]
selected_match_id=463150

In [19]:
noise = False
match_date = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['date'].values[0]
team = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['second_innings'].values[0]
opponent = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['first_innings'].values[0]
location = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['location'].values[0]


#team performance
team_performance,team_global_cache = get_recent_team_performance(match_summary_df,team, match_date,global_cache=team_global_cache)
#opponent performance
opponent_performance,team_global_cache = get_recent_team_performance(match_summary_df,opponent,match_date,global_cache=team_global_cache)



if recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['train_data'].values[0]==True:
    selected_match_details = pd.read_csv('csv_data/train/'+str(selected_match_id)+'.csv')
else:
    selected_match_details = pd.read_csv('csv_data/test/'+str(selected_match_id)+'.csv')

#runs scored
runs_scored=selected_match_details[selected_match_details['team']==team]['total'].sum()

#team batting strength
team_batsmaan_list = list(selected_match_details[selected_match_details['team']==team]['batsman'].unique())

batsman_performance_list =[]
for batsman in team_batsmaan_list:
    batsman_performance,batsman_global_cache=get_recent_batsman_performance(match_summary_df,team,batsman,match_date,global_cache=batsman_global_cache)
    #print(batsman,team,batsman_performance)
    batsman_performance_list.append(batsman_performance)

batsman_performance_df = pd.DataFrame(batsman_performance_list)
batsman_performance_df.fillna(0)
batsman_performance_df.sort_values('winning_contribution',ascending=False,inplace=True)
batsman_performance_df = batsman_performance_df.head(6)
batsman_mean=dict(batsman_performance_df.mean())


#opponenet
opponent_bowler_list = list(selected_match_details[selected_match_details['team']==team]['bowler'].unique())

opponent_bowler_performance_list=[]
for bowler in opponent_bowler_list:
    bowler_performance,bowler_global_cache=get_recent_bowler_performance(match_summary_df,opponent,bowler,match_date,global_cache=bowler_global_cache)
    opponent_bowler_performance_list.append(bowler_performance)
    
opponent_bowler_df = pd.DataFrame(opponent_bowler_performance_list)
opponent_bowler_df.fillna(0)
opponent_bowler_df.sort_values('winning_contribution',ascending=False,inplace=True)
opponent_bowler_df = opponent_bowler_df.head(4)
opponent_bowler_mean=dict(opponent_bowler_df.mean())

#target score
target_score = selected_match_details[selected_match_details['team']==opponent]['total'].sum()

#win
if recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['winner'].values[0]==team:
    win=1
else:
    win=0
    


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


In [35]:
opponent_bowler_df

Unnamed: 0,batsman,country,total_runs,run_rate,average_score,opponent_variability,player_of_the_match,winning_contribution,run_rate_effectiveness
0,HM Amla,South Africa,1135,1.000882,59.736842,6,3,0.259558,1.079993
4,JP Duminy,South Africa,661,0.904241,38.882353,6,1,0.131599,0.97589
3,JH Kallis,South Africa,392,0.844828,43.555556,2,2,0.116863,0.919833
1,GC Smith,South Africa,544,0.726302,32.0,6,0,0.080624,0.778795


In [61]:
# ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
# match_dt = datetime.utcfromtimestamp(ts)

# two_months = dateutil.relativedelta.relativedelta(months=2)
# a_year = dateutil.relativedelta.relativedelta(years=1)
# check_range = match_dt - two_months

# type(global_cache['India']['date'])
# type(check_range)
#get_recent_team_performance(match_summary_df,team, match_date,global_cache=global_cache)
opponent_bowler_mean

{'total_runs': 636.5,
 'run_rate': 0.9419848079708595,
 'average_score': 48.688095238095244,
 'opponent_variability': 3.75,
 'player_of_the_match': 1.5,
 'winning_contribution': 0.1644078735075831,
 'run_rate_effectiveness': 0.9812661135409207}

In [30]:
team

'South Africa'

In [184]:
# get opponent History
# last_5_opponent = match_summary_df[(match_summary_df['first_innings']==team) 
#                                    & (match_summary_df['second_innings']==opponent)
#                                    & (match_summary_df['date']<match_date)
#                                   ].sort_values('date',ascending=False).head(5)

# last_5_location = match_summary_df[(match_summary_df['first_innings']==team) 
#                                    & (match_summary_df['location']==location)
#                                    & (match_summary_df['date']<match_date)
#                                   ].sort_values('date',ascending=False).head(5)

# last_5_match = match_summary_df[(match_summary_df['first_innings']==team) 
#                                    & (match_summary_df['date']<match_date)
#                                   ].sort_values('date',ascending=False).head(5)

# if last_5_opponent.shape[0]==0:
#     similar_opponent_list = find_similar_team(opponent,country_rank_df)
#     last_5_opponent = match_summary_df[(match_summary_df['first_innings']==team) 
#                                    & (match_summary_df['second_innings'].isin(similar_opponent_list))
#                                    & (match_summary_df['date']<match_date)
#                                   ].sort_values('date',ascending=False).head(5)
# if last_5_location.shape[0]==0:
#     similar_team_list = find_similar_team(team,country_rank_df)
#     #last_5_location = pd.DataFrame(last_5_match)
#     last_5_location = match_summary_df[(match_summary_df['first_innings'].isin(similar_team_list)) 
#                                    & (match_summary_df['location']==location)
#                                    & (match_summary_df['date']<match_date)
#                                   ].sort_values('date',ascending=False).head(5)
#     location_team_list = similar_team_list
# else:
#     location_team_list = [team]
    
# opponent_base,opponent_trend,opponent_trend_predict,opponent_mean = get_trend(last_5_opponent,'team',[team],'total')
# location_base,location_trend,location_trend_predict,location_mean = get_trend(last_5_location,'team',location_team_list,'total')
# current_base,current_trend,current_trend_predict,current_mean = get_trend(last_5_match,'team',[team],'total')




In [116]:
# last_5_match.rename(columns={'winner':'winning_team'},inplace=True)
# #last_5_match.sort_values(['date'],inplace=True)
# selected_match_id_list = list(last_5_match['match_id'])
# match_detail_list = []
# for match_id in selected_match_id_list:
#     if last_5_match[last_5_match['match_id']==match_id]['train_data'].values[0]==True:
#         match_info = pd.read_csv('csv_data/train/'+str(match_id)+'.csv')
#     else:
#         match_info = pd.read_csv('csv_data/test/'+str(match_id)+'.csv')
#     match_detail_list.append(match_info)
# match_detail_df = pd.concat(match_detail_list)
# match_detail_df.fillna('NA',inplace=True)

# match_detail_df = last_5_match.merge(match_detail_df,how='inner',on='match_id')

# #match_detail_df.sort_values('date',ascending=True,inplace=True)
# sorted_df = match_detail_df[match_detail_df['team']==team].groupby('match_id').agg({'date': 'min', 'total': 'sum'}).reset_index()
# sorted_df.sort_values('date',inplace=True)

# y=np.array(sorted_df['total'])
# x=np.array(range(sorted_df.shape[0])).reshape(-1,1)
# linear_trend_model = LinearRegression()
# linear_trend_model.fit(x,y)
# base = linear_trend_model.intercept_
# trend = linear_trend_model.coef_[0]
# mean = sorted_df['total'].mean()


In [165]:
# match_year=pd.to_datetime(match_date).year
# previous_year = match_year-1
# country_rank_df = pd.read_csv('csv_data/country_rank_'+str(previous_year)+'.csv')
# search_country = opponent
# search_rank = country_rank_df[country_rank_df['country']==search_country]['rank'].values[0]
# if search_rank == 1:
#     similar_rank_list = [2]
# elif search_rank == country_rank_df['rank'].max():
#     similar_rank_list = [country_rank_df['rank'].max()-1]
# else:
#     similar_rank_list = [search_rank-1,search_rank+1]
# list(country_rank_df[country_rank_df['rank'].isin(similar_rank_list)]['country'])
    
# #similar_country_list = 

['Sri Lanka', 'Zimbabwe']

In [29]:
pd.DataFrame([{'a':1,'b':2}])

Unnamed: 0,a,b
0,1,2


In [36]:
recent_match_summary_df['match_id'].shape

(1173,)

# Loop

In [17]:
feature_data=[]
team_data=[]
opponent_data=[]
team_batsman_data=[]
team_batsman_max_data=[]
opponent_bowler_data=[]
for selected_match_id in tqdm(match_id_list):
    
    noise = False
    match_date = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['date'].values[0]
    team = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['second_innings'].values[0]
    opponent = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['first_innings'].values[0]
    location = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['location'].values[0]


    #team performance
    team_performance,team_global_cache = get_recent_team_performance(match_summary_df,team, match_date,global_cache=team_global_cache)
    team_performance['match_id']=int(selected_match_id)
    #opponent performance
    opponent_performance,team_global_cache = get_recent_team_performance(match_summary_df,opponent,match_date,global_cache=team_global_cache)
    opponent_performance['match_id']=int(selected_match_id)


    if recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['train_data'].values[0]==True:
        selected_match_details = pd.read_csv('csv_data/train/'+str(selected_match_id)+'.csv')
    else:
        selected_match_details = pd.read_csv('csv_data/test/'+str(selected_match_id)+'.csv')

    #runs scored
    runs_scored=selected_match_details[selected_match_details['team']==team]['total'].sum()

    #team batting strength
    team_batsmaan_list = list(selected_match_details[selected_match_details['team']==team]['batsman'].unique())

    batsman_performance_list =[]
    for batsman in team_batsmaan_list:
        batsman_performance,batsman_global_cache=get_recent_batsman_performance(match_summary_df,team,batsman,match_date,global_cache=batsman_global_cache)
        if batsman_performance is not None:
            batsman_performance_list.append(batsman_performance)
    
    if len(batsman_performance_list)==0:
        print(selected_match_id,' skipped due to missing batsman performance ',team,opponent)
        continue
    
    batsman_performance_df = pd.DataFrame(batsman_performance_list)
    batsman_performance_df.fillna(0)
    batsman_performance_df.sort_values('winning_contribution',ascending=False,inplace=True)
    batsman_performance_df = batsman_performance_df.head(6)
    batsman_mean=dict(batsman_performance_df.mean())
    batsman_mean['match_id']=int(selected_match_id)
    
    batsman_max=dict(batsman_performance_df.max())
    batsman_max['match_id']=int(selected_match_id)


    #opponenet
    opponent_bowler_list = list(selected_match_details[selected_match_details['team']==team]['bowler'].unique())

    opponent_bowler_performance_list=[]
    for bowler in opponent_bowler_list:
        bowler_performance,bowler_global_cache=get_recent_bowler_performance(match_summary_df,opponent,bowler,match_date,global_cache=bowler_global_cache)
        if bowler_performance is not None:
            opponent_bowler_performance_list.append(bowler_performance)

    if len(opponent_bowler_performance_list)==0:
        print(selected_match_id,' skipped due to missing opponenet bowler performance ',team,opponent)
        continue
    opponent_bowler_df = pd.DataFrame(opponent_bowler_performance_list)
    opponent_bowler_df.fillna(0)
    opponent_bowler_df.sort_values('winning_contribution',ascending=False,inplace=True)
    opponent_bowler_df = opponent_bowler_df.head(4)
    opponent_bowler_mean=dict(opponent_bowler_df.mean())
    opponent_bowler_mean['match_id']=int(selected_match_id)
    
    #target score
    target_score = selected_match_details[selected_match_details['team']==opponent]['total'].sum()

    #win
    if recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['winner'].values[0]==team:
        win=1
    else:
        win=0
    
    
    last_5_opponent = match_summary_df[(match_summary_df['first_innings']==team) 
                                   & (match_summary_df['second_innings']==opponent)
                                   & (match_summary_df['date']<match_date)
                                  ].sort_values('date',ascending=False).head(5)

    last_5_location = match_summary_df[(match_summary_df['first_innings']==team) 
                                       & (match_summary_df['location']==location)
                                       & (match_summary_df['date']<match_date)
                                      ].sort_values('date',ascending=False).head(5)

    last_5_match = match_summary_df[(match_summary_df['first_innings']==team) 
                                       & (match_summary_df['date']<match_date)
                                      ].sort_values('date',ascending=False).head(5)
    if last_5_match.shape[0]==0:
        print(selected_match_id,' skipped due to missing prior performance ',team,opponent)
        continue
    if last_5_opponent.shape[0]==0:
        last_5_opponent = pd.DataFrame(last_5_match)
        noise = True
    if last_5_location.shape[0]==0:
        last_5_location = pd.DataFrame(last_5_match)
        noise=True
        

    opponent_base,opponent_trend,opponent_trend_predict,opponent_mean = get_trend(last_5_opponent,'team',[team],'total')
    location_base,location_trend,location_trend_predict,location_mean = get_trend(last_5_location,'team',[team],'total')
    current_base,current_trend,current_trend_predict,current_mean = get_trend(last_5_match,'team',[team],'total')

    feature_dict = {
        'match_id':int(selected_match_id),
        'match_date':match_date,
        'team':team,
        'opponent':opponent,
        'location':location,
        'opponent_base':opponent_base,
        'opponent_trend':opponent_trend,
        'opponent_trend_predict':opponent_trend_predict,
        'opponent_mean':opponent_mean,
        'location_base':location_base,
        'location_trend':location_trend,
        'location_trend_predict':location_trend_predict,
        'location_mean':location_mean,
        'current_base':current_base,
        'current_trend':current_trend,
        'current_trend_predict':current_trend_predict,
        'current_mean':current_mean,
        'is_train':recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['train_data'].values[0],
        'target_score':target_score,
        'win':win,
        'runs_scored':runs_scored,
        'noise':noise
        
    }
    
#     print(feature_dict)
#     print(team_performance)
#     print(opponent_performance)
#     print("======================")
    
    feature_data.append(dict(feature_dict))
    team_data.append(dict(team_performance))
    opponent_data.append(dict(opponent_performance))
    team_batsman_data.append(dict(batsman_mean))
    team_batsman_max_data.append(dict(batsman_max))
    opponent_bowler_data.append(dict(opponent_bowler_mean))

feature_df = pd.DataFrame(feature_data)
team_df = pd.DataFrame(team_data)
opponent_df = pd.DataFrame(opponent_data)
team_batsman_df = pd.DataFrame(team_batsman_data)
team_batsman_max_df = pd.DataFrame(team_batsman_max_data)
opponent_bowler_df = pd.DataFrame(opponent_bowler_data)
feature_df.to_csv('csv_data/second_innings_feature_expanded.csv',index=False)
team_df.to_csv('csv_data/second_innings_team.csv',index=False)
opponent_df.to_csv('csv_data/second_innings_opponent.csv',index=False)
team_batsman_df.to_csv('csv_data/second_innings_batsman.csv',index=False)
team_batsman_max_df.to_csv('csv_data/second_innings_batsman_max.csv',index=False)
opponent_bowler_df.to_csv('csv_data/second_innings_opponent_bowler.csv',index=False)


HBox(children=(FloatProgress(value=0.0, max=1246.0), HTML(value='')))

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  t

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].

  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('19

335346  skipped due to missing batsman performance  United Arab Emirates Bangladesh


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


335347  skipped due to missing batsman performance  Hong Kong Pakistan


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  

335349  skipped due to missing prior performance  Hong Kong India
335350  skipped due to missing prior performance  United Arab Emirates Sri Lanka


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman

325579  skipped due to missing batsman performance  Scotland Netherlands


  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1,

325581  skipped due to missing batsman performance  Ireland Zimbabwe
360800  skipped due to missing batsman performance  Kenya South Africa


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1,

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]


390227  skipped due to missing opponenet bowler performance  Scotland Canada


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]


390253  skipped due to missing opponenet bowler performance  Scotland Afghanistan


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler][

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowle

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
 

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  t

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/wi

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nuni

426384  skipped due to missing batsman performance  Ireland Australia


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

450498  skipped due to missing batsman performance  Afghanistan Scotland


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]


464720  skipped due to missing opponenet bowler performance  Ireland Netherlands


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  b

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timed

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta6

433559  skipped due to missing opponenet bowler performance  New Zealand Kenya


  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team].shape[0]
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = 

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team].shape[0]
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/wi

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('19

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta6

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1,

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = 

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


545656  skipped due to missing opponenet bowler performance  Pakistan Afghanistan


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==team]

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


543888  skipped due to missing batsman performance  Netherlands Afghanistan


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum

  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = 

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  t

593983  skipped due to missing batsman performance  Zimbabwe West Indies


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bo

592266  skipped due to missing batsman performance  Kenya Canada


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team].shape[0]
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribu

601077  skipped due to missing batsman performance  Scotland Pakistan


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  b

631138  skipped due to missing batsman performance  Netherlands Ireland


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
 

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['sc

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]

710295  skipped due to missing batsman performance  Afghanistan Pakistan


  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_

736503  skipped due to missing batsman performance  United Arab Emirates Afghanistan
736511  skipped due to missing opponenet bowler performance  United Arab Emirates Hong Kong


  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1,

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1,

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  b

803795  skipped due to missing prior performance  United Arab Emirates Afghanistan


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


803797  skipped due to missing prior performance  United Arab Emirates Afghanistan


  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  t

  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  t

  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
 

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

911049  skipped due to missing opponenet bowler performance  United Arab Emirates Hong Kong


  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler][

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's'

932849  skipped due to missing batsman performance  West Indies South Africa


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

997993  skipped due to missing opponenet bowler performance  United Arab Emirates Scotland


  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler][

1026847  skipped due to missing batsman performance  Papua New Guinea Hong Kong


  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team].shape[0]
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[

  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum(

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wick

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = 

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = 

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler][

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  b

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]

1154649  skipped due to missing batsman performance  Nepal United Arab Emirates


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's'

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]

  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bo

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  b

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')


1174844  skipped due to missing batsman performance  Papua New Guinea United States of America
1174845  skipped due to missing batsman performance  Oman Namibia


  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/wi

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler][

1188378  skipped due to missing batsman performance  Netherlands Zimbabwe


  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1,

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')

1196670  skipped due to missing prior performance  Oman Papua New Guinea
1196671  skipped due to missing opponenet bowler performance  Oman Scotland


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()


1196673  skipped due to missing prior performance  Oman Scotland


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()


1196675  skipped due to missing prior performance  Oman Papua New Guinea


  winning_contribution = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()
  country_run_rate = winning_match_df[winning_match_df['team']==team]['scored_runs'].sum()/winning_match_df[winning_match_df['team']==team].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_

  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()

1211169  skipped due to missing prior performance  Oman United Arab Emirates


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]


1211171  skipped due to missing prior performance  Oman Namibia


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  t

1215122  skipped due to missing batsman performance  Nepal Oman


  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('19

  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wi

1233461  skipped due to missing opponenet bowler performance  Zimbabwe Pakistan


  team_wickets_per_run = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df[winning_match_df['opponent']==team]['total'].sum()
  bowler_wicket_per_run = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['total'].sum()
  team_wicket_per_match = winning_match_df[winning_match_df['opponent']==team]['wicket'].sum()/winning_match_df['match_id'].nunique()
  bowler_wicket_per_match = winning_match_df[winning_match_df['bowler']==bowler]['wicket'].sum()/winning_match_df[winning_match_df['bowler']==bowler]['match_id'].nunique()
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[winning_match_df['batsman']==batsman]['scored_runs'].sum()/winning_match_df[winning_match_df['batsman']==batsman].shape[0]
  ts = (match_date - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
  batsman_run_rate = winning_match_df[




In [12]:
#feature_df.to_csv('csv_data/feature_first_innings.csv',index=False)

In [33]:
opponent_bowler_df

Unnamed: 0,batsman,country,total_runs,run_rate,average_score,opponent_variability,player_of_the_match,winning_contribution,run_rate_effectiveness
0,ES Szwarczynski,Netherlands,2,0.25,2.0,1,0,,
1,W Barresi,Netherlands,6,0.24,6.0,1,0,,
2,Mudassar Bukhari,Netherlands,7,0.259259,7.0,1,0,,
3,TLW Cooper,Netherlands,25,0.657895,25.0,1,0,,


In [14]:
feature_df[feature_df['is_train']==True].shape

(647, 25)

In [31]:
feature_data[0]

{'match_id': 463150,
 'match_date': numpy.datetime64('2011-01-12T00:00:00.000000000'),
 'team': 'South Africa',
 'opponent': 'India',
 'location': 'Durban',
 'runs_scored': 289,
 'opponent_base': 365.0,
 'opponent_trend': 0.0,
 'opponent_trend_predict': 365.0,
 'opponent_mean': 365.0,
 'location_base': 353.6,
 'location_trend': -17.60000000000001,
 'location_trend_predict': 247.99999999999997,
 'location_mean': 300.8,
 'current_base': 353.6,
 'current_trend': -17.60000000000001,
 'current_trend_predict': 247.99999999999997,
 'current_mean': 300.8,
 'is_train': True,
 'noise': False}

In [32]:
team_data[0]

{'country': 'South Africa',
 'win_ratio': 0.7333333333333333,
 'effective_win_by_runs': 415,
 'effective_win_by_wickets': 22,
 'matches_played': 15,
 'win_count': 11,
 'match_id': 463150}

In [22]:
team_df['match_id'].nunique()

292

In [20]:
!ls -l csv_data

total 3896
-rw-r--r--     1 souviksahabhowmik  staff   32833 Dec 12 20:31 batsman_perfromance_2008.csv
-rw-r--r--     1 souviksahabhowmik  staff   42981 Dec 12 20:31 batsman_perfromance_2009.csv
-rw-r--r--     1 souviksahabhowmik  staff   42361 Dec 12 20:32 batsman_perfromance_2010.csv
-rw-r--r--     1 souviksahabhowmik  staff   45726 Dec 12 20:32 batsman_perfromance_2011.csv
-rw-r--r--     1 souviksahabhowmik  staff   31669 Dec 12 20:32 batsman_perfromance_2012.csv
-rw-r--r--     1 souviksahabhowmik  staff   42116 Dec 12 20:32 batsman_perfromance_2013.csv
-rw-r--r--     1 souviksahabhowmik  staff   48395 Dec 12 20:32 batsman_perfromance_2014.csv
-rw-r--r--     1 souviksahabhowmik  staff   50155 Dec 12 20:33 batsman_perfromance_2015.csv
-rw-r--r--     1 souviksahabhowmik  staff   47727 Dec 12 20:33 batsman_perfromance_2016.csv
-rw-r--r--     1 souviksahabhowmik  staff   48305 Dec 12 20:33 batsman_perfromance_2017.csv
-rw-r--r--     1 souviksahabhowmik  staff   50096 Dec 12 2

In [26]:
# team_batsman_max_df = pd.DataFrame(team_batsman_max_data)
# team_batsman_max_df.to_csv('csv_data/first_innings_batsman_max.csv',index=False)
# opponent_bowler_df.to_csv('csv_data/first_innings_opponent_bowler.csv',index=False)

In [27]:
batsman_performance_df

Unnamed: 0,batsman,country,total_runs,run_rate,average_score,opponent_variability,player_of_the_match,winning_contribution,run_rate_effectiveness
0,DA Warner,Australia,313,0.904624,31.3,4,1,0.212064,0.963089
1,AJ Finch,Australia,424,0.751773,42.4,4,0,0.186616,0.932271
4,GJ Maxwell,Australia,186,1.1625,62.0,1,1,0.174364,1.318053
3,M Labuschagne,Australia,394,0.879464,43.777778,4,0,0.091423,0.929604
2,SPD Smith,Australia,352,0.893401,58.666667,3,0,0.013195,0.891833


In [28]:
batsman_performance_df.max()

batsman                   SPD Smith
country                   Australia
total_runs                      424
run_rate                     1.1625
average_score                    62
opponent_variability              4
player_of_the_match               1
winning_contribution       0.212064
run_rate_effectiveness      1.31805
dtype: object