In [208]:
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
import dateutil
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

from sklearn.metrics.pairwise import cosine_similarity
import json
import pickle


In [209]:
custom_date_parser = lambda x: datetime.strptime(x, "%Y-%m-%d")

In [210]:
cutoff_start_year = '2013'
cutoff_start_date = datetime.strptime(cutoff_start_year+'-01-01','%Y-%m-%d')

In [211]:
match_summary_df = pd.read_csv('csv_data/match_list.csv',parse_dates=['date'],date_parser=custom_date_parser)
recent_match_summary_df = match_summary_df[match_summary_df['date']>=cutoff_start_date]
recent_match_summary_df.shape

(781, 11)

In [212]:
match_stats_df = pd.read_csv('csv_data/match_stats.csv')

In [213]:
recent_match_summary_df=recent_match_summary_df.merge(match_stats_df,on='match_id',how='inner')

In [214]:
recent_match_summary_df.columns

Index(['match_id', 'date', 'location', 'first_innings', 'second_innings',
       'winner', 'win_by', 'win_dif', 'toss_winner', 'player_of_match',
       'train_data', 'team_statistics', 'batsman_1', 'batsman_1_runs',
       'batsman_2', 'batsman_2_runs', 'batsman_3', 'batsman_3_runs',
       'batsman_4', 'batsman_4_runs', 'batsman_5', 'batsman_5_runs',
       'batsman_6', 'batsman_6_runs', 'batsman_7', 'batsman_7_runs',
       'batsman_8', 'batsman_8_runs', 'batsman_9', 'batsman_9_runs',
       'batsman_10', 'batsman_10_runs', 'batsman_11', 'batsman_11_runs',
       'bowler_1', 'bowler_1_wickets', 'bowler_2', 'bowler_2_wickets',
       'bowler_3', 'bowler_3_wickets', 'bowler_4', 'bowler_4_wickets',
       'bowler_5', 'bowler_5_wickets', 'bowler_6', 'bowler_6_wickets',
       'bowler_7', 'bowler_7_wickets', 'bowler_8', 'bowler_8_wickets',
       'bowler_9', 'bowler_9_wickets', 'bowler_10', 'bowler_10_wickets',
       'bowler_11', 'bowler_11_wickets', 'total_run', 'total_wickets'],
     

In [215]:
#change here to include second innings
#recent_match_summary_df=recent_match_summary_df[recent_match_summary_df['first_innings']==recent_match_summary_df['team_statistics']]

In [216]:
recent_match_summary_df.shape

(1562, 58)

In [217]:
#recent_match_summary_df.iloc[0]

In [46]:
batsman_list = pickle.load(open('batsman_list.pkl','rb'))

In [56]:
batsman_enc_map = dict()
enc_batsman_map = dict()

for ind,batsman in enumerate(batsman_list):
    oh_encoding = np.zeros((len(batsman_list))).astype(int)
    oh_encoding[ind] = 1
    batsman_enc_map[batsman]=oh_encoding
    enc_batsman_map[str(oh_encoding)]=batsman
    
oh_encoding = np.zeros((len(batsman_list))).astype(int)
batsman_enc_map['not_batted']=oh_encoding
enc_batsman_map[str(oh_encoding)]='not_batted'
    

In [92]:
location_list = list(recent_match_summary_df['location'].unique())

In [118]:
loc_enc_map = dict()
enc_loc_map = dict()

for ind,location in enumerate(location_list):
    oh_encoding = np.zeros((len(location_list))).astype(int)
    oh_encoding[ind] = 1
    loc_enc_map[location]=oh_encoding
    enc_loc_map[str(oh_encoding)]=location
    

In [119]:
pickle.dump(batsman_enc_map,open('batsman_enc_map.pkl','wb'))
pickle.dump(enc_batsman_map,open('enc_batsman_map.pkl','wb'))
pickle.dump(loc_enc_map,open('loc_enc_map_for_batsman.pkl','wb'))
pickle.dump(enc_loc_map,open('enc_loc_map_for_batsman.pkl','wb'))

In [120]:
def get_oh_pos(pos):
    vec=np.zeros((11)).astype(int) 
    vec[pos-1]=1
    return vec



In [121]:
get_oh_pos(1)

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [122]:
country_enc_map=pickle.load(open('country_enc_map.pkl','rb'))
# loc_enc_map=pickle.load(open('loc_enc_map.pkl','rb'))

In [218]:
match_id_list = list(recent_match_summary_df['match_id'].unique())

In [219]:
batsman_data_train = []
postion_data_train =[]
location_data_train = []
opposition_data_train =[]
score_data_train = []

batsman_data_test = []
postion_data_test =[]
location_data_test = []
opposition_data_test =[]
score_data_test = []
for selected_match_id in tqdm(match_id_list):
    teams = list(recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['team_statistics'].unique())
    is_train = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['train_data'].values[0]
    
    for team in teams:
        location = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['location'].values[0]
        if location not in loc_enc_map:
            print('skipped match at ',location)
            continue
        location_oh = np.array(loc_enc_map[location])
        team_stats_df = recent_match_summary_df[
            (recent_match_summary_df['match_id']==selected_match_id) & 
            (recent_match_summary_df['team_statistics']==team)]
        
        if team_stats_df['first_innings'].values[0]==team:
            opposition = team_stats_df['second_innings'].values[0]
        else:
            opposition = team_stats_df['first_innings'].values[0]
        opposition_oh = np.array(country_enc_map[opposition])
        
        
        for bi in range(11):
            position_oh = get_oh_pos(bi+1)
            batsman = team_stats_df['batsman_'+str(bi+1)].values[0]
            if batsman == 'not_batted':
                batsman_oh = np.array(batsman_enc_map['not_batted'])
            else:
                batsman_oh = np.array(batsman_enc_map[team.strip()+' '+batsman.strip()])
            score = team_stats_df['batsman_'+str(bi+1)+'_runs'].values[0]
            
            if is_train:
                batsman_data_train.append(batsman_oh)
                postion_data_train.append(position_oh)
                location_data_train.append(location_oh)
                opposition_data_train.append(opposition_oh)
                score_data_train.append(score)
                
            else:
                batsman_data_test.append(batsman_oh)
                postion_data_test.append(position_oh)
                location_data_test.append(location_oh)
                opposition_data_test.append(opposition_oh)
                score_data_test.append(score)
            
            
            
            

HBox(children=(FloatProgress(value=0.0, max=781.0), HTML(value='')))




In [220]:
batsman_train = np.stack(batsman_data_train)
position_train = np.stack(postion_data_train)
location_train = np.stack(location_data_train)
opposition_train = np.stack(opposition_data_train)
score_train =  np.stack(score_data_train)

batsman_test = np.stack(batsman_data_test)
position_test = np.stack(postion_data_test)
location_test = np.stack(location_data_test)
opposition_test = np.stack(opposition_data_test)
score_test =  np.stack(score_data_test)

In [221]:
import keras as k
import keras.backend as K
from keras.layers import *
from keras.models import Model
from keras.regularizers import l2

from keras.optimizers import Adam, Adadelta

In [222]:
def create_batsman_embedding_model(batsman_len,position_len,location_len,opposition_len):
    batsman_input = Input((batsman_len,),name="batsman_input")
    position_input = Input((position_len,),name="position_input")
    location_input = Input((location_len,),name="location_input")
    opposition_input = Input((opposition_len,),name="opposition_input")
    
    #team_output = Dropout(0.2)(team_input)
    batsman_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="batsman_1")(batsman_input)
    batsman_output = Dropout(0.2)(batsman_output)
    
    #opponent_output = Dropout(0.2)(opponent_input)
    position_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="pos_1")(position_input)
    position_output = Dropout(0.2)(position_output)
    
    #location_output = Dropout(0.2)(location_input)
    location_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="loc_1")(location_input)
    location_output = Dropout(0.2)(location_output)
    
    opposition_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="opposition_1")(opposition_input)
    opposition_output = Dropout(0.2)(opposition_output)
    
#     concat_out = Concatenate()([batsman_output, position_output,location_output,opposition_output])
#     runs_output = Dropout(0.2)(concat_out)
#     runs_output = Dense(1,name="final_score",use_bias=True, kernel_regularizer=l2(0.01),bias_regularizer=l2(0.01),kernel_initializer='normal')(concat_out)
    
    concat_out = Concatenate()([batsman_output, position_output,location_output,opposition_output])
    concat_out = Dropout(0.2)(concat_out)
    concat_out = Dense(10,name="concat_2",use_bias=True, kernel_regularizer=l2(0.01),bias_regularizer=l2(0.01),activation="relu",kernel_initializer='normal')(concat_out)
    runs_output = Dense(1,name="final_score",use_bias=True, kernel_regularizer=l2(0.01),bias_regularizer=l2(0.01),kernel_initializer='normal')(concat_out)
    
    
    batsman_model = Model(inputs=batsman_input,outputs=batsman_output)
    position_model = Model(inputs=position_input,outputs=position_output)
    location_model = Model(inputs=location_input,outputs=location_output)
    opposition_model = Model(inputs=opposition_input,outputs=opposition_output)
    group_encode_model = Model(inputs=[batsman_input,position_input,location_input,opposition_input],
                               outputs=concat_out)
    
    runs_model = Model(inputs=[batsman_input,position_input,location_input,opposition_input],
                      outputs=runs_output)
    
    
    return batsman_model,position_model,location_model,opposition_model,group_encode_model,runs_model

In [223]:
batsman_model,position_model,location_model,opposition_model,group_encode_model,runs_model = \
create_batsman_embedding_model(batsman_train.shape[1],\
                               position_train.shape[1],\
                               location_train.shape[1],\
                               opposition_train.shape[1])

In [243]:
runs_model.compile(loss="mean_squared_error", metrics=["mean_absolute_error"],optimizer=Adam(0.0001))

In [245]:
runs_model.fit([batsman_train,position_train,location_train,opposition_train], score_train,
               validation_data=([batsman_test,position_test,location_test,opposition_test],score_test),epochs=100, batch_size=10)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100

KeyboardInterrupt: 

In [246]:
predicted_train = runs_model.predict([batsman_train,position_train,location_train,opposition_train])
predicted_test = runs_model.predict([batsman_test,position_test,location_test,opposition_test])

In [247]:
score_train

array([106,  76,   2, ...,   0,   0,   0])

In [248]:
compare_df_train = pd.DataFrame()
compare_df_test = pd.DataFrame()
compare_df_train['actual']=score_train
compare_df_train['predicted']=predicted_train

compare_df_test['actual']=score_test
compare_df_test['predicted']=predicted_test



In [249]:
compare_df_train=compare_df_train[compare_df_train['actual']!=0]
compare_df_test=compare_df_test[compare_df_test['actual']!=0]

In [250]:
np.mean(np.abs(compare_df_train['actual']-compare_df_train['predicted']))

18.639464361272495

In [251]:
np.mean(np.abs(compare_df_test['actual']-compare_df_test['predicted']))

20.232756821118485

In [252]:
def store_model(model,name):
    # serialize model to JSON
    model_json = model.to_json()
    with open(name+".json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(name+".h5")
    print("Saved model to disk")

In [253]:
store_model(batsman_model,'batsman_model')
store_model(position_model,'batsman_position_model')
store_model(location_model,'batsman_location_model')
store_model(group_encode_model,'batsman_group_encode_model')
store_model(runs_model,'batsman_encode_runs_model')

Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk


In [254]:
batsman_matrix_list = []
for batsman in batsman_list:
    oh_enc = np.array(batsman_enc_map[batsman])
    batsman_matrix_list.append(oh_enc)
    
batsman_matrix = np.stack(batsman_matrix_list)
    

In [255]:
batsman_matrix.shape

(845, 845)

In [256]:
batsman_enc_matrix = batsman_model.predict(batsman_matrix)

In [257]:
batsman_enc_matrix.shape

(845, 10)

In [258]:
batsman_similarity_matrix = cosine_similarity(batsman_enc_matrix)

In [262]:
similarity_record = []
for ind,batsman in enumerate(batsman_list):
    similarity_dict ={}
    print('similarity for batsman ',batsman)
    similarity_dict['source']=batsman
    for cur_ind,similarity_index in enumerate(reversed(list(batsman_similarity_matrix[ind].argsort()))):
        
        if cur_ind>0 and cur_ind<=5:
            similarity_dict['similar_'+str(cur_ind)]=batsman_list[similarity_index]
            print('\t',batsman_list[similarity_index])
    similarity_record.append(similarity_dict)   

similarity for batsman  Afghanistan Dawlat Zadran
	 Pakistan Fawad Alam
	 Hong Kong MS Chapman
	 South Africa CH Morris
	 Pakistan Mohammad Amir
	 Australia JW Hastings
similarity for batsman  India Harbhajan Singh
	 India YS Chahal
	 Hong Kong Waqas Khan
	 South Africa AL Phehlukwayo
	 Ireland SR Thompson
	 New Zealand MJ Guptill
similarity for batsman  South Africa L Sipamla
	 United Arab Emirates Junaid Siddique (2)
	 Oman Khurram Nawaz
	 United States of America CAH Stevenson
	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  Afghanistan Rashid Khan
	 Zimbabwe RP Burl
	 New Zealand GH Worker
	 Bangladesh Nasir Hossain
	 South Africa F du Plessis
	 India R Ashwin
similarity for batsman  England JE Root
	 Bangladesh Mosaddek Hossain
	 Bangladesh Sunzamul Islam
	 Zimbabwe BRM Taylor
	 Kenya T Mishra
	 New Zealand NT Broom
similarity for batsman  Ireland AD Poynter
	 Bangladesh Abul Hasan
	 Bangladesh Mohammad Ashraful
	 United Arab Emirates Mo

	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  New Zealand LH Ferguson
	 Bangladesh Mosharraf Hossain
	 Kenya SO Ngoche
	 South Africa L Ngidi
	 United Arab Emirates Nasir Aziz
	 South Africa JP Duminy
similarity for batsman  West Indies CR Brathwaite
	 West Indies JL Carter
	 Sri Lanka BMAJ Mendis
	 Australia MP Stoinis
	 Sri Lanka DSK Madushanka
	 Pakistan Misbah-ul-Haq
similarity for batsman  Pakistan Sami Aslam
	 West Indies CAK Walton
	 Bangladesh Mohammad Ashraful
	 New Zealand JD Ryder
	 Ireland PKD Chase
	 West Indies OC McCoy
similarity for batsman  Namibia MG Erasmus
	 United Arab Emirates Junaid Siddique (2)
	 Oman Khurram Nawaz
	 United States of America CAH Stevenson
	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  Bangladesh Jubair Hossain
	 West Indies AD Russell
	 Sri Lanka JDF Vandersay
	 United Arab Emirates Shaiman Anwar
	 Netherlands ES Szwarczynski
	 New Zealand GD Elliott
sim

similarity for batsman  Netherlands Mudassar Bukhari
	 United Arab Emirates Umair Ali
	 Zimbabwe DT Tiripano
	 England TS Roland-Jones
	 United States of America HR Walsh
	 United States of America JS Malhotra
similarity for batsman  Pakistan Haider Ali
	 United Arab Emirates Junaid Siddique (2)
	 Oman Khurram Nawaz
	 United States of America CAH Stevenson
	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  New Zealand CJ Anderson
	 Pakistan Sohail Tanvir
	 Pakistan Haris Sohail
	 United Arab Emirates Ashfaq Ahmed
	 Bangladesh Ziaur Rahman
	 New Zealand DG Brownlie
similarity for batsman  Scotland HG Munsey
	 New Zealand DL Vettori
	 Australia AC Agar
	 Sri Lanka MD Shanaka
	 Scotland AC Evans
	 West Indies KA Pollard
similarity for batsman  Canada HS Baidwan
	 Bangladesh Jahurul Islam
	 Sri Lanka PADLR Sandakan
	 United Arab Emirates AR Berenger
	 Netherlands MR Swart
	 United Arab Emirates Rohan Mustafa
similarity for batsman  Afghanistan Mirw

similarity for batsman  Afghanistan Najeeb Tarakai
	 Bangladesh Taskin Ahmed
	 Australia SA Abbott
	 Scotland SG Whittingham
	 Pakistan Mohammad Irfan
	 Ireland AR White
similarity for batsman  Bangladesh Sunzamul Islam
	 Australia DJ Hussey
	 Canada Khurram Chohan
	 Bangladesh Mosaddek Hossain
	 Bangladesh Taijul Islam
	 Kenya CO Obuya
similarity for batsman  India S Dhawan
	 India SS Iyer
	 New Zealand NT Broom
	 Pakistan Aamer Yamin
	 West Indies RR Beaton
	 Netherlands W Barresi
similarity for batsman  Pakistan Abid Ali
	 United Arab Emirates Junaid Siddique (2)
	 Oman Khurram Nawaz
	 United States of America CAH Stevenson
	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  Zimbabwe N M'shangwe
	 Pakistan Sohail Tanvir
	 Pakistan Imam-ul-Haq
	 Pakistan Haris Sohail
	 Hong Kong Nadeem Ahmed
	 Scotland RM Haq
similarity for batsman  Scotland JH Davey
	 United Arab Emirates Amjad Ali
	 Papua New Guinea D Bau
	 Nepal RK Paudel
	 Sri Lanka FDM Ka

similarity for batsman  Zimbabwe H Masakadza
	 Pakistan Yasir Shah
	 Zimbabwe DT Tiripano
	 New Zealand HM Nicholls
	 West Indies D Bishoo
	 England SW Billings
similarity for batsman  New Zealand KA Jamieson
	 United Arab Emirates Junaid Siddique (2)
	 Oman Khurram Nawaz
	 United States of America CAH Stevenson
	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  India I Sharma
	 West Indies D Ramdin
	 Pakistan Bilal Asif
	 Bangladesh Mominul Haque
	 Sri Lanka LD Chandimal
	 Pakistan Imad Wasim
similarity for batsman  Pakistan Misbah-ul-Haq
	 Canada Usman Limbada
	 England TS Roland-Jones
	 Sri Lanka BMAJ Mendis
	 Australia MR Marsh
	 Australia DJ Hussey
similarity for batsman  West Indies CH Gayle
	 Sri Lanka MDKJ Perera
	 Bangladesh Shamsur Rahman
	 Pakistan Abdur Rehman
	 India KK Ahmed
	 Sri Lanka S Prasanna
similarity for batsman  Sri Lanka A Dananjaya
	 India R Dhawan
	 Pakistan Sarfraz Ahmed
	 West Indies D Ramdin
	 India R Vinay Kumar
	 

	 Pakistan Sarfraz Ahmed
	 Sri Lanka A Dananjaya
	 Zimbabwe AG Cremer
	 West Indies D Bishoo
	 New Zealand C Munro
similarity for batsman  Scotland NM Carter
	 Scotland MM Iqbal
	 Afghanistan Fareed Ahmad
	 Zimbabwe T Maruma
	 Australia MC Henriques
	 India S Kaul
similarity for batsman  Oman Aamir Kaleem
	 United Arab Emirates Junaid Siddique (2)
	 Oman Khurram Nawaz
	 United States of America CAH Stevenson
	 United States of America Jasdeep Singh
	 Afghanistan Sayed Shirzad
similarity for batsman  Kenya T Mishra
	 Australia SE Marsh
	 England JE Root
	 New Zealand NT Broom
	 United Arab Emirates Rameez Shahzad
	 Sri Lanka AD Mathews
similarity for batsman  Pakistan Mohammad Hafeez
	 New Zealand TA Boult
	 Hong Kong Nadeem Ahmed
	 India KM Jadhav
	 West Indies DJ Bravo
	 Canada Usman Limbada
similarity for batsman  India Kuldeep Yadav
	 Australia JA Richardson
	 West Indies DJG Sammy
	 Bangladesh Nurul Hasan
	 Nepal B Regmi
	 India SK Raina
similarity for batsman  Pakistan Azhar Ali
	

	 Zimbabwe R Mutumbami
	 Canada AS Hansra
	 Scotland CD Wallace
	 Papua New Guinea D Ravu
	 Zimbabwe T Kamungozi
similarity for batsman  India YS Chahal
	 Canada NR Kumar
	 Nepal Karan KC
	 India Harbhajan Singh
	 South Africa RR Hendricks
	 United Arab Emirates Amir Hayat
similarity for batsman  England JW Dernbach
	 South Africa Imran Tahir
	 Australia SM Boland
	 Zimbabwe WP Masakadza
	 Zimbabwe CT Mutombodzi
	 United Arab Emirates Khurram Khan
similarity for batsman  Afghanistan Ihsanullah
	 United Arab Emirates Zahoor Khan
	 Netherlands SJ Myburgh
	 South Africa GC Smith
	 West Indies E Lewis
	 South Africa H Davids
similarity for batsman  West Indies KAJ Roach
	 India KD Karthik
	 Sri Lanka SMA Priyanjan
	 Pakistan Ehsan Adil
	 Sri Lanka PC de Silva
	 Pakistan Fakhar Zaman
similarity for batsman  Pakistan Rahat Ali
	 Ireland SR Thompson
	 India Harbhajan Singh
	 India YS Chahal
	 Sri Lanka SMSM Senanayake
	 Canada AS Hansra
similarity for batsman  Pakistan Babar Azam
	 West Indie

	 Papua New Guinea S Bau
	 Afghanistan Ihsanullah
	 United Arab Emirates Amjad Ali
similarity for batsman  Hong Kong Babar Hayat
	 South Africa DW Steyn
	 Sri Lanka BMAJ Mendis
	 New Zealand GH Worker
	 Pakistan Sarfraz Ahmed
	 West Indies JN Mohammed
similarity for batsman  Australia MC Henriques
	 Zimbabwe T Maruma
	 Ireland AR White
	 India S Kaul
	 Scotland NM Carter
	 Afghanistan Fareed Ahmad
similarity for batsman  Australia PJ Hughes
	 New Zealand AF Milne
	 Scotland CD de Lange
	 South Africa L Ngidi
	 New Zealand TA Boult
	 Pakistan Mohammad Hafeez
similarity for batsman  West Indies NO Miller
	 Sri Lanka BMAJ Mendis
	 Pakistan Fakhar Zaman
	 India R Ashwin
	 Pakistan Misbah-ul-Haq
	 Zimbabwe Sikandar Raza
similarity for batsman  Australia AC Agar
	 India S Dhawan
	 West Indies RR Beaton
	 Bangladesh Mahmudullah
	 Zimbabwe Sikandar Raza
	 New Zealand DL Vettori
similarity for batsman  Bangladesh Imrul Kayes
	 United Arab Emirates Nasir Aziz
	 West Indies KMA Paul
	 Hong Kong I

In [263]:
pd.DataFrame(similarity_record).to_excel('Batsman_similarity.xlsx',index=False)

In [264]:
pd.DataFrame(similarity_record)

Unnamed: 0,source,similar_1,similar_2,similar_3,similar_4,similar_5
0,Afghanistan Dawlat Zadran,Pakistan Fawad Alam,Hong Kong MS Chapman,South Africa CH Morris,Pakistan Mohammad Amir,Australia JW Hastings
1,India Harbhajan Singh,India YS Chahal,Hong Kong Waqas Khan,South Africa AL Phehlukwayo,Ireland SR Thompson,New Zealand MJ Guptill
2,South Africa L Sipamla,United Arab Emirates Junaid Siddique (2),Oman Khurram Nawaz,United States of America CAH Stevenson,United States of America Jasdeep Singh,Afghanistan Sayed Shirzad
3,Afghanistan Rashid Khan,Zimbabwe RP Burl,New Zealand GH Worker,Bangladesh Nasir Hossain,South Africa F du Plessis,India R Ashwin
4,England JE Root,Bangladesh Mosaddek Hossain,Bangladesh Sunzamul Islam,Zimbabwe BRM Taylor,Kenya T Mishra,New Zealand NT Broom
...,...,...,...,...,...,...
840,Kenya TM Odoyo,United Arab Emirates Umair Ali,Zimbabwe DT Tiripano,England TS Roland-Jones,United States of America HR Walsh,United States of America JS Malhotra
841,United Arab Emirates Junaid Siddique (2),Oman AV Lalcheta,United Arab Emirates CP Rizwan,England BT Foakes,Nepal A Bohara,West Indies N Pooran
842,United States of America J Theron,Oman AV Lalcheta,United Arab Emirates CP Rizwan,England BT Foakes,Nepal A Bohara,West Indies N Pooran
843,England JC Tredwell,Scotland CS MacLeod,West Indies J Charles,India Kuldeep Yadav,Nepal B Regmi,Bangladesh Mominul Haque


In [267]:
#group_encode_model.predict([batsman_test,position_test,location_test,opposition_test])