In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
import dateutil
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
custom_date_parser = lambda x: datetime.strptime(x, "%Y-%m-%d")

In [3]:
cutoff_start_year = '2014'
cutoff_start_date = datetime.strptime(cutoff_start_year+'-01-01','%Y-%m-%d')

In [4]:
match_summary_df = pd.read_csv('csv_data/match_list.csv',parse_dates=['date'],date_parser=custom_date_parser)
recent_match_summary_df = match_summary_df[match_summary_df['date']>=cutoff_start_date]
recent_match_summary_df.shape

(680, 11)

In [5]:
country_set = set(recent_match_summary_df['first_innings'].unique()).union(set(recent_match_summary_df['second_innings'].unique()))
country_list = list(country_set)


In [6]:
country_list

['Ireland',
 'Zimbabwe',
 'Hong Kong',
 'United Arab Emirates',
 'New Zealand',
 'Australia',
 'Namibia',
 'Bangladesh',
 'Sri Lanka',
 'Canada',
 'Nepal',
 'England',
 'Oman',
 'West Indies',
 'United States of America',
 'Scotland',
 'Kenya',
 'Afghanistan',
 'Pakistan',
 'South Africa',
 'India',
 'Netherlands',
 'Papua New Guinea']

In [7]:
location_list = list(set(recent_match_summary_df['location'].unique()))
location_list

['Port Elizabeth',
 'Sydney',
 'Kirtipur',
 'Mumbai',
 'Karachi',
 'Benoni',
 'Mirpur',
 'Nelson',
 'Bengaluru',
 'Potchefstroom',
 'Bristol',
 'Fatullah',
 'Sylhet',
 'Rawalpindi Cricket Stadium',
 'Grenada',
 'Dublin',
 'Kimberley',
 'Dubai International Cricket Stadium',
 'Al Amarat',
 'Chennai',
 'Bulawayo Athletic Club',
 'Wellington',
 'Ranchi',
 'St Lucia',
 'Dharamsala',
 'Trinidad',
 'Centurion',
 'London',
 'Chattogram',
 'Galle International Stadium',
 'Abu Dhabi',
 'Auckland',
 'East London',
 'Brisbane',
 'Melbourne',
 'Aberdeen',
 'Barbados',
 'Nagpur',
 'Birmingham',
 'Harare',
 'Rangiri Dambulla International Stadium',
 'Nottingham',
 'Belfast',
 'Napier',
 'Windhoek',
 'Perth',
 'Harare Sports Club',
 'Guyana',
 'Dunedin',
 'Ahmedabad',
 'Cardiff',
 'Perth Stadium',
 'Chittagong',
 'Adelaide',
 'Cuttack',
 'Pallekele International Cricket Stadium',
 'Sharjah',
 'Kochi',
 'Whangarei',
 'Southampton',
 'Deventer',
 'Paarl',
 'Lauderhill',
 'Greater Noida',
 'Antigua',
 '

In [8]:
#team_location_list = country_list+location_list
#team_location_list

In [9]:
# enc_map={}
# decode_map={}

# for count,team_loc in enumerate(team_location_list):
#     vec = []
#     for vec_val in range(len(team_location_list)):
#         if count != vec_val:
#             vec.append(0)
#         else:
#             vec.append(1)
#     enc_map[team_loc]=vec
#     decode_map[str(vec)]=team_loc
    

In [8]:
country_enc_map={}
enc_country_map={}

for count,country in enumerate(country_list):
    country_vec = []
    for vec_val in range(len(country_list)):
        if count != vec_val:
            country_vec.append(0)
        else:
            country_vec.append(1)
    country_enc_map[country]=country_vec
    enc_country_map[str(country_vec)]=country
    

In [9]:
# location_list = list(set(recent_match_summary_df['location'].unique()))
# location_list

In [10]:
loc_enc_map={}
enc_loc_map={}

for count,loc in enumerate(location_list):
    loc_vec = []
    for vec_val in range(len(location_list)):
        if count != vec_val:
            loc_vec.append(0)
        else:
            loc_vec.append(1)
    loc_enc_map[loc]=loc_vec
    enc_loc_map[str(loc_vec)]=loc

In [11]:
match_id_list = list(recent_match_summary_df['match_id'])

In [12]:
team_vector_list_train = []
opponent_vector_list_train = []
location_vector_list_train = []
first_innings_score_list_train = []
total_score_list_train =[]

team_vector_list_test = []
opponent_vector_list_test = []
location_vector_list_test = []
first_innings_score_list_test = []
total_score_list_test =[]

for selected_match_id in tqdm(match_id_list):
    is_train = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['train_data'].values[0]
    if is_train:
        match_detail_df = pd.read_csv('csv_data/train/'+str(selected_match_id)+'.csv')
    else:
        match_detail_df = pd.read_csv('csv_data/test/'+str(selected_match_id)+'.csv')
        
    team = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['first_innings'].values[0]
    opponent = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['second_innings'].values[0]
    location = recent_match_summary_df[recent_match_summary_df['match_id']==selected_match_id]['location'].values[0]
    
    first_innings_score = match_detail_df[match_detail_df['team']==team]['total'].sum()
    total_runs = match_detail_df['total'].sum()
    
    team_vector = np.array(country_enc_map[team])
    opponent_vector = np.array(country_enc_map[opponent])
    location_vector = np.array(loc_enc_map[location])
    
    if is_train:
        team_vector_list_train.append(team_vector)
        opponent_vector_list_train.append(opponent_vector)
        location_vector_list_train.append(location_vector)
        first_innings_score_list_train.append(first_innings_score)
        total_score_list_train.append(total_runs)
    else:
        team_vector_list_test.append(team_vector)
        opponent_vector_list_test.append(opponent_vector)
        location_vector_list_test.append(location_vector)
        first_innings_score_list_test.append(first_innings_score)
        total_score_list_test.append(total_runs)
        
    

HBox(children=(FloatProgress(value=0.0, max=680.0), HTML(value='')))




In [13]:
#match_detail_df

In [14]:
recent_match_summary_df.columns

Index(['match_id', 'date', 'location', 'first_innings', 'second_innings',
       'winner', 'win_by', 'win_dif', 'toss_winner', 'player_of_match',
       'train_data'],
      dtype='object')

In [15]:
#np.array(team_vector_list_train).shape

In [16]:
team_vector_train = np.stack(team_vector_list_train)
opponent_vector_train = np.stack(opponent_vector_list_train)
location_vector_train = np.stack(location_vector_list_train)
Y_score_train = np.array(first_innings_score_list_train)
Y_total_train = np.array(total_score_list_train)

team_vector_test = np.stack(team_vector_list_test)
opponent_vector_test = np.stack(opponent_vector_list_test)
location_vector_test = np.stack(location_vector_list_test)
Y_score_test = np.array(first_innings_score_list_test)
Y_total_test = np.array(total_score_list_test)

In [17]:
team_vector_train.shape,opponent_vector_train.shape,location_vector_train.shape

((515, 23), (515, 23), (515, 114))

In [18]:
import keras as k
import keras.backend as K
from keras.layers import *
from keras.models import Model
from keras.regularizers import l2

from keras.optimizers import Adam, Adadelta

In [19]:
def create_embedding_model(team_vector_len,opponent_vector_len,location_vector_len):
    team_input = Input((team_vector_len,),name="team_input")
    opponent_input = Input((opponent_vector_len,),name="opponent_input")
    location_input = Input((location_vector_len,),name="location_input")

    #team_output = Dropout(0.2)(team_input)
    team_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="team_1")(team_input)
    team_output = Dropout(0.2)(team_output)
    
    #opponent_output = Dropout(0.2)(opponent_input)
    opponent_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="opp_1")(opponent_input)
    opponent_output = Dropout(0.2)(opponent_output)
    
    #location_output = Dropout(0.2)(location_input)
    location_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',bias_regularizer=l2(0.01),kernel_regularizer=l2(0.1),name="loc_1")(location_input)
    location_output = Dropout(0.2)(location_output)
    
    concat_out = Concatenate()([team_output, opponent_output,location_output])
    runs_output = Dropout(0.2)(concat_out)
    runs_output = Dense(1,name="final_score",use_bias=True, kernel_regularizer=l2(0.01),bias_regularizer=l2(0.01),kernel_initializer='normal')(concat_out)
    
    
    team_model = Model(inputs=team_input,outputs=team_output)
    opponent_model = Model(inputs=opponent_input,outputs=opponent_output)
    location_model = Model(inputs=location_input,outputs=location_output)
    group_encode_model = Model(inputs=[team_input,opponent_input,location_input],
                               outputs=concat_out)
    
    runs_model = Model(inputs=[team_input,opponent_input,location_input],
                      outputs=runs_output)
    
    
    return team_model,opponent_model,location_model,group_encode_model,runs_model

In [20]:
# def create_embedding_model(team_vector_len,location_vector_len):
#     team_input = Input((team_vector_len,),name="team_input")
#     #opponent_input = Input((opponent_vector_len,),name="opponent_input")
#     location_input = Input((location_vector_len,),name="location_input")

#     team_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',kernel_regularizer=l2(0.01),name="team_1")(team_input)
    
#     #opponent_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',name="opp_1")(opponent_input)
    
#     location_output = Dense(10,activation="relu",use_bias=True, kernel_initializer='normal',kernel_regularizer=l2(0.01),name="loc_1")(location_input)
    
#     runs_output = Concatenate()([team_output,location_output])
#     runs_output = Dense(1,name="final_score",use_bias=True, kernel_initializer='normal',kernel_regularizer=l2(0.01))(runs_output)
    
    
#     team_model = Model(inputs=team_input,outputs=team_output)
#     #opponent_model = Model(inputs=opponent_input,outputs=opponent_output)
#     location_model = Model(inputs=location_input,outputs=location_output)
    
#     runs_model = Model(inputs=[team_input,location_input],
#                       outputs=runs_output)
    
    
#     return team_model,location_model,runs_model

In [50]:
team_model,opponent_model,location_model,group_encode_model,runs_model = create_embedding_model(team_vector_train.shape[1],
                                                                             opponent_vector_train.shape[1],
                                                                             location_vector_train.shape[1])

# team_model,location_model,runs_model = create_embedding_model(team_vector_train.shape[1],
#                                                                              location_vector_train.shape[1])


In [51]:
#group_encode_model.summary()

In [59]:
runs_model.compile(loss="mean_squared_error", metrics=["mean_absolute_percentage_error","mean_absolute_error"],optimizer=Adam(0.00001))

In [65]:
runs_model.fit([team_vector_train,opponent_vector_train,location_vector_train], Y_score_train,
               validation_data=([team_vector_test,opponent_vector_test,location_vector_test],Y_score_test),epochs=10, batch_size=10)
#runs_model.fit([team_vector_train,location_vector_train], Y_score_train, epochs=1000, batch_size=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f8b9c693eb8>

In [66]:
#Y_total_train
runs_model.evaluate([team_vector_test,opponent_vector_test,location_vector_test], Y_score_test)



[4132.14892578125, 23.944561004638672, 49.926368713378906]

In [63]:
#runs_model.predict([team_vector_test,opponent_vector_test,location_vector_test])

In [26]:
location_vec_map={}
one_hot_list =[]
locations=[]
for location in location_list:
    location_enc = np.array(loc_enc_map[location]).reshape(1,-1)
    location_vec = location_model.predict(location_enc)
    location_vec_map[location]=location_vec[0]
    one_hot_list.append(np.array(loc_enc_map[location]))
    locations.append(location)
    
location_matrix = location_model.predict(np.stack(one_hot_list))

In [27]:
location_list

['Port Elizabeth',
 'Sydney',
 'Kirtipur',
 'Mumbai',
 'Karachi',
 'Benoni',
 'Mirpur',
 'Nelson',
 'Bengaluru',
 'Potchefstroom',
 'Bristol',
 'Fatullah',
 'Sylhet',
 'Rawalpindi Cricket Stadium',
 'Grenada',
 'Dublin',
 'Kimberley',
 'Dubai International Cricket Stadium',
 'Al Amarat',
 'Chennai',
 'Bulawayo Athletic Club',
 'Wellington',
 'Ranchi',
 'St Lucia',
 'Dharamsala',
 'Trinidad',
 'Centurion',
 'London',
 'Chattogram',
 'Galle International Stadium',
 'Abu Dhabi',
 'Auckland',
 'East London',
 'Brisbane',
 'Melbourne',
 'Aberdeen',
 'Barbados',
 'Nagpur',
 'Birmingham',
 'Harare',
 'Rangiri Dambulla International Stadium',
 'Nottingham',
 'Belfast',
 'Napier',
 'Windhoek',
 'Perth',
 'Harare Sports Club',
 'Guyana',
 'Dunedin',
 'Ahmedabad',
 'Cardiff',
 'Perth Stadium',
 'Chittagong',
 'Adelaide',
 'Cuttack',
 'Pallekele International Cricket Stadium',
 'Sharjah',
 'Kochi',
 'Whangarei',
 'Southampton',
 'Deventer',
 'Paarl',
 'Lauderhill',
 'Greater Noida',
 'Antigua',
 '

In [28]:
len(location_list),len(country_list)

(114, 23)

In [67]:
# location_vec_map={}
# locations=[]
# one_hot_list =[]
# for location in loc_enc_map.keys():
#     location_enc = np.array(loc_enc_map[location]).reshape(1,-1)
#     location_vec = location_model.predict(location_enc)
#     location_vec_map[location]=location_vec[0]
#     one_hot_list.append(np.array(loc_enc_map[location]))
#     locations.append(location)
#     #break
    
# location_matrix = location_model.predict(np.stack(one_hot_list))

In [68]:
#locations = list(location_vec_map.keys())
# location_sim_matrix = np.zeros((len(locations),len(locations)))
# for i,loc_1 in enumerate(locations):
#     for j,loc_2 in enumerate(locations):
#         if loc_1!=loc_2:
#             location_sim_matrix[i,j]=

In [29]:
location_matrix.shape

(114, 10)

In [30]:
len(locations)

114

In [31]:
location_sim_matrix = cosine_similarity(location_matrix)

In [32]:
# loc_sim_df=pd.DataFrame(location_sim_matrix)
# loc_sim_df.columns = location_list
# loc_sim_df['locations']=location_list
# loc_sim_df

In [73]:
similarity_record = []
for ind,location in enumerate(location_list):
    similarity_dict ={}
    print('similarity for location ',location)
    similarity_dict['source']=location
    for cur_ind,similarity_index in enumerate(reversed(list(location_sim_matrix[ind].argsort()))):
        print('\t',location_list[similarity_index])
        if cur_ind>0 and cur_ind<=4:
            similarity_dict['similar_'+str(cur_ind)]=location_list[similarity_index]
    similarity_record.append(similarity_dict)   

similarity for location  Port Elizabeth
	 Port Elizabeth
	 Kuala Lumpur
	 Potchefstroom
	 Napier
	 Trinidad
	 Cape Town
	 Chennai
	 Kolkata
	 Edinburgh
	 Hambantota
	 Whangarei
	 Guwahati
	 Deventer
	 Adelaide
	 Lauderhill
	 Kandy
	 Dehra Dun
	 Bridgetown
	 Melbourne
	 Windhoek
	 Aberdeen
	 Al Amarat
	 Rawalpindi Cricket Stadium
	 Karachi
	 Kirtipur
	 Sydney
	 Lucknow
	 Sharjah
	 Gros Islet
	 Taunton
	 Bready
	 St George's
	 Canberra
	 Ranchi
	 Antigua
	 Barbados
	 Hobart
	 Jamaica
	 Dunedin
	 Cuttack
	 Galle International Stadium
	 Thiruvananthapuram
	 Ahmedabad
	 Nottingham
	 Queenstown Events Centre
	 Belfast
	 Kochi
	 St Lucia
	 Bengaluru
	 Sylhet
	 St Kitts
	 Colombo
	 Mirpur
	 Benoni
	 Lincoln
	 Durban
	 Chester-le-Street
	 Johannesburg
	 Leeds
	 Nagpur
	 Pallekele International Cricket Stadium
	 Kanpur
	 Hamilton
	 Dubai
	 Rangiri Dambulla International Stadium
	 Mount Maunganui
	 Lahore
	 Christchurch
	 Visakhapatnam
	 Wellington
	 Chattogram
	 Pune
	 Hong Kong
	 Bristol
	 Sout

	 Sydney Cricket Ground
	 Mount Maunganui
	 Dharamsala
	 Nelson
	 Kuala Lumpur
	 Hong Kong
	 Indore
	 Bulawayo Athletic Club
	 Sharjah Cricket Stadium
	 Pune
	 Leeds
	 Centurion
	 Dharmasala
	 Harare Sports Club
	 Brisbane
	 Melbourne Cricket Ground
	 Perth Stadium
	 Dubai International Cricket Stadium
	 Manchester
	 Kimberley
	 Adelaide Oval
similarity for location  Nelson
	 Nelson
	 Chandigarh
	 Sharjah Cricket Stadium
	 Brisbane
	 Bengaluru
	 Sylhet
	 Thiruvananthapuram
	 St Lucia
	 Lincoln
	 Jamaica
	 Chester-le-Street
	 Pune
	 Lahore
	 Paarl
	 Kochi
	 Leeds
	 Adelaide
	 Windhoek
	 Rawalpindi Cricket Stadium
	 Dehra Dun
	 Bridgetown
	 Melbourne
	 Al Amarat
	 Kandy
	 Aberdeen
	 Lauderhill
	 Deventer
	 Sharjah
	 Lucknow
	 Sydney
	 Taunton
	 Karachi
	 Gros Islet
	 Bready
	 Kirtipur
	 St George's
	 Chittagong
	 Bristol
	 Mount Maunganui
	 Greater Noida
	 Barbados
	 Cape Town
	 Ahmedabad
	 Hambantota
	 Visakhapatnam
	 Queenstown Events Centre
	 Kanpur
	 Colombo
	 Rangiri Dambulla Intern

	 Lahore
	 Napier
	 Colombo
	 Hobart
	 Kolkata
	 Mount Maunganui
	 Hambantota
	 Guwahati
	 St Lucia
	 Birmingham
	 Bloemfontein
	 Belfast
	 Hamilton
	 Durban
	 Chattogram
	 Chandigarh
	 Trinidad
	 Fatullah
	 Mirpur
	 Dubai
	 Grenada
	 Hong Kong
	 Cardiff
	 Whangarei
	 Greater Noida
	 Paarl
	 Chittagong
	 Wellington
	 Christchurch
	 Southampton
	 Sydney Cricket Ground
	 Sylhet
	 Potchefstroom
	 Indore
	 Dublin
	 St Kitts
	 Delhi
	 Leeds
	 Dharamsala
	 Auckland
	 Mumbai
	 Abu Dhabi
	 Kuala Lumpur
	 Pune
	 East London
	 Pallekele International Cricket Stadium
	 Harare
	 Hyderabad
	 Centurion
	 Dharmasala
	 Bulawayo
	 Cuttack
	 Bristol
	 Sharjah Cricket Stadium
	 Nelson
	 Rajkot
	 Galle International Stadium
	 London
	 Port Elizabeth
	 Dunedin
	 Guyana
	 Johannesburg
	 Rangiri Dambulla International Stadium
	 Antigua
	 Perth
	 Bulawayo Athletic Club
	 Melbourne Cricket Ground
	 Brisbane
	 Benoni
	 Perth Stadium
	 Dubai International Cricket Stadium
	 Harare Sports Club
	 Dhaka
	 Manchester

	 Perth
	 Sylhet
	 Bulawayo
	 Guyana
	 Antigua
	 Bristol
	 Sharjah Cricket Stadium
	 Nelson
	 Rangiri Dambulla International Stadium
	 Harare Sports Club
	 Melbourne Cricket Ground
	 Kimberley
	 Brisbane
	 Perth Stadium
	 Dhaka
	 Manchester
	 Adelaide Oval
similarity for location  Bulawayo Athletic Club
	 Bulawayo Athletic Club
	 Dubai International Cricket Stadium
	 Christchurch
	 Hamilton
	 Durban
	 Ranchi
	 Delhi
	 Harare
	 Guwahati
	 Cardiff
	 Cape Town
	 Kochi
	 Rajkot
	 Bloemfontein
	 Hong Kong
	 Wellington
	 London
	 Chennai
	 Kolkata
	 Nagpur
	 Perth
	 Pallekele International Cricket Stadium
	 Canberra
	 Pune
	 Ahmedabad
	 Mount Maunganui
	 Belfast
	 Mumbai
	 Grenada
	 Edinburgh
	 Deventer
	 Kandy
	 Al Amarat
	 Lauderhill
	 Adelaide
	 Melbourne
	 Aberdeen
	 Rawalpindi Cricket Stadium
	 Dehra Dun
	 Windhoek
	 Sharjah
	 Gros Islet
	 Lucknow
	 St George's
	 Taunton
	 Karachi
	 Bready
	 Kirtipur
	 Sydney
	 Bridgetown
	 Potchefstroom
	 Dublin
	 Kanpur
	 Guyana
	 Lincoln
	 Bengaluru


	 Kolkata
	 Melbourne Cricket Ground
	 Trinidad
	 Guwahati
	 Bulawayo
	 Paarl
	 Mumbai
	 Cuttack
	 Southampton
	 Belfast
	 Bristol
	 Queenstown Events Centre
	 Dunedin
	 Napier
	 Sylhet
	 Guyana
	 Auckland
	 Sydney Cricket Ground
	 Leeds
	 Hambantota
	 Sharjah Cricket Stadium
	 Perth Stadium
	 Whangarei
	 Nelson
	 St Kitts
	 Potchefstroom
	 East London
	 Johannesburg
	 Kuala Lumpur
	 Benoni
	 Hyderabad
	 Brisbane
	 Manchester
	 Rangiri Dambulla International Stadium
	 Galle International Stadium
	 Port Elizabeth
	 Harare Sports Club
	 Antigua
	 Kimberley
	 Adelaide Oval
	 Dhaka
similarity for location  Chattogram
	 Chattogram
	 Dubai
	 Hobart
	 Barbados
	 Edinburgh
	 Colombo
	 Sharjah
	 Aberdeen
	 Lucknow
	 Bridgetown
	 Dehra Dun
	 St George's
	 Al Amarat
	 Rawalpindi Cricket Stadium
	 Kandy
	 Melbourne
	 Taunton
	 Windhoek
	 Adelaide
	 Gros Islet
	 Sydney
	 Kirtipur
	 Lauderhill
	 Deventer
	 Karachi
	 Bready
	 Canberra
	 Greater Noida
	 Lincoln
	 Visakhapatnam
	 Kochi
	 Nottingham
	 G

	 Cape Town
	 Dharamsala
	 Visakhapatnam
	 Cardiff
	 Kanpur
	 Edinburgh
	 Abu Dhabi
	 Queenstown Events Centre
	 Colombo
	 Bloemfontein
	 Mount Maunganui
	 Hamilton
	 Canberra
	 Southampton
	 Napier
	 Hambantota
	 Chennai
	 Durban
	 Lincoln
	 Delhi
	 Kolkata
	 Birmingham
	 Thiruvananthapuram
	 Christchurch
	 Jamaica
	 Chittagong
	 Kochi
	 Lahore
	 Sydney Cricket Ground
	 Fatullah
	 St Lucia
	 Chattogram
	 East London
	 Guwahati
	 Grenada
	 Chandigarh
	 Barbados
	 Mirpur
	 Hong Kong
	 Hyderabad
	 Hobart
	 Nottingham
	 Dubai
	 Mumbai
	 Harare
	 Wellington
	 Greater Noida
	 Guyana
	 Paarl
	 Auckland
	 Sylhet
	 Pune
	 Potchefstroom
	 Dublin
	 Leeds
	 St Kitts
	 Trinidad
	 Sharjah Cricket Stadium
	 Dharmasala
	 London
	 Whangarei
	 Centurion
	 Indore
	 Galle International Stadium
	 Pallekele International Cricket Stadium
	 Perth Stadium
	 Bulawayo Athletic Club
	 Kuala Lumpur
	 Rajkot
	 Bulawayo
	 Harare Sports Club
	 Nelson
	 Perth
	 Bristol
	 Antigua
	 Port Elizabeth
	 Cuttack
	 Melbourne

	 Port Elizabeth
	 Lahore
	 Mirpur
	 Sylhet
	 Kanpur
	 Chennai
	 Pune
	 Greater Noida
	 Hamilton
	 Kimberley
	 Canberra
	 Durban
	 Harare
	 Guwahati
	 Jamaica
	 Southampton
	 Wellington
	 East London
	 Sydney Cricket Ground
	 Dhaka
	 Johannesburg
	 Bloemfontein
	 Delhi
	 Potchefstroom
	 St Kitts
	 Nottingham
	 Cardiff
	 Dubai
	 Centurion
	 Sharjah Cricket Stadium
	 Hong Kong
	 Birmingham
	 Rangiri Dambulla International Stadium
	 Cuttack
	 Auckland
	 Grenada
	 Bristol
	 Galle International Stadium
	 Perth
	 Fatullah
	 Indore
	 Mumbai
	 Trinidad
	 London
	 Whangarei
	 Bulawayo Athletic Club
	 Dharmasala
	 Dunedin
	 Dubai International Cricket Stadium
	 Dublin
	 Melbourne Cricket Ground
	 Pallekele International Cricket Stadium
	 Rajkot
	 Bulawayo
	 Benoni
	 Brisbane
	 Manchester
similarity for location  Guyana
	 Guyana
	 Kolkata
	 Delhi
	 Hamilton
	 Southampton
	 Sylhet
	 Kanpur
	 Colombo
	 Chester-le-Street
	 Bloemfontein
	 Ranchi
	 Nagpur
	 Mumbai
	 Mirpur
	 Bulawayo
	 Durban
	 Belfas

	 Thiruvananthapuram
	 Lincoln
	 Napier
	 Cape Town
	 Durban
	 St Kitts
	 Nelson
	 Hamilton
	 Lahore
	 Delhi
	 Harare
	 East London
	 Christchurch
	 Jamaica
	 Nottingham
	 Cardiff
	 Dubai
	 Mount Maunganui
	 Southampton
	 Canberra
	 Dublin
	 Sharjah Cricket Stadium
	 Kolkata
	 Rangiri Dambulla International Stadium
	 Auckland
	 Antigua
	 Queenstown Events Centre
	 London
	 Perth Stadium
	 Guyana
	 Chennai
	 Sydney Cricket Ground
	 Harare Sports Club
	 Wellington
	 Johannesburg
	 Guwahati
	 Bulawayo
	 Centurion
	 Perth
	 Leeds
	 Melbourne Cricket Ground
	 Cuttack
	 Indore
	 Hong Kong
	 Whangarei
	 Pune
	 Trinidad
	 Brisbane
	 Pallekele International Cricket Stadium
	 Dhaka
	 Adelaide Oval
	 Dunedin
	 Manchester
	 Mumbai
	 Potchefstroom
	 Kuala Lumpur
	 Galle International Stadium
	 Rajkot
	 Port Elizabeth
	 Dharmasala
	 Bulawayo Athletic Club
	 Benoni
	 Dubai International Cricket Stadium
	 Kimberley
similarity for location  Adelaide
	 Sharjah
	 Gros Islet
	 Lauderhill
	 Adelaide
	 Rawa

	 Windhoek
	 Sharjah
	 Aberdeen
	 Al Amarat
	 Sydney
	 Kirtipur
	 Karachi
	 Rawalpindi Cricket Stadium
	 Bready
	 Lucknow
	 Melbourne
	 Gros Islet
	 Bengaluru
	 Chattogram
	 Napier
	 Chittagong
	 Grenada
	 Ahmedabad
	 Thiruvananthapuram
	 Lahore
	 Leeds
	 Indore
	 Hyderabad
	 Centurion
	 Colombo
	 Mount Maunganui
	 Kochi
	 Birmingham
	 Canberra
	 Ranchi
	 Cape Town
	 Auckland
	 Chester-le-Street
	 Nagpur
	 Visakhapatnam
	 Nelson
	 Christchurch
	 Queenstown Events Centre
	 Jamaica
	 St Lucia
	 Guwahati
	 Chennai
	 Dublin
	 Nottingham
	 Edinburgh
	 Sydney Cricket Ground
	 Dharamsala
	 Harare
	 Dubai
	 Belfast
	 Abu Dhabi
	 Johannesburg
	 Sharjah Cricket Stadium
	 Fatullah
	 Bloemfontein
	 Whangarei
	 Kuala Lumpur
	 Kanpur
	 St Kitts
	 Trinidad
	 Bristol
	 Wellington
	 East London
	 Hamilton
	 Cuttack
	 Rangiri Dambulla International Stadium
	 Kolkata
	 Mirpur
	 Brisbane
	 Pune
	 Melbourne Cricket Ground
	 Hong Kong
	 Sylhet
	 Adelaide Oval
	 Dunedin
	 Potchefstroom
	 Durban
	 London
	 Ca

	 Delhi
	 Leeds
	 Dharamsala
	 Auckland
	 Mumbai
	 Abu Dhabi
	 Kuala Lumpur
	 Pune
	 East London
	 Pallekele International Cricket Stadium
	 Harare
	 Hyderabad
	 Centurion
	 Dharmasala
	 Bulawayo
	 Cuttack
	 Bristol
	 Sharjah Cricket Stadium
	 Nelson
	 Rajkot
	 Galle International Stadium
	 London
	 Port Elizabeth
	 Dunedin
	 Guyana
	 Johannesburg
	 Rangiri Dambulla International Stadium
	 Antigua
	 Perth
	 Bulawayo Athletic Club
	 Melbourne Cricket Ground
	 Brisbane
	 Benoni
	 Perth Stadium
	 Dubai International Cricket Stadium
	 Harare Sports Club
	 Dhaka
	 Manchester
	 Kimberley
	 Adelaide Oval
similarity for location  Colombo
	 Colombo
	 Barbados
	 Kolkata
	 Lucknow
	 Bridgetown
	 Dehra Dun
	 Kandy
	 Lauderhill
	 Deventer
	 Adelaide
	 St George's
	 Rawalpindi Cricket Stadium
	 Windhoek
	 Al Amarat
	 Aberdeen
	 Melbourne
	 Karachi
	 Sharjah
	 Gros Islet
	 Taunton
	 Kirtipur
	 Sydney
	 Bready
	 Hambantota
	 Lahore
	 Kanpur
	 Visakhapatnam
	 Nagpur
	 Cape Town
	 Lincoln
	 Wellington
	

	 Colombo
	 Dubai
	 Guwahati
	 Visakhapatnam
	 Durban
	 Trinidad
	 Lahore
	 Cuttack
	 Greater Noida
	 Nagpur
	 Christchurch
	 Mount Maunganui
	 Belfast
	 Chittagong
	 Mirpur
	 Pallekele International Cricket Stadium
	 Wellington
	 Chester-le-Street
	 Centurion
	 Kanpur
	 Grenada
	 Birmingham
	 Leeds
	 Queenstown Events Centre
	 Whangarei
	 Dublin
	 Chandigarh
	 Dunedin
	 Indore
	 Kolkata
	 Hong Kong
	 Kuala Lumpur
	 Hyderabad
	 Bristol
	 Bloemfontein
	 Johannesburg
	 Dharamsala
	 Pune
	 Hamilton
	 Port Elizabeth
	 Harare
	 St Kitts
	 London
	 Sylhet
	 Perth
	 Rangiri Dambulla International Stadium
	 Fatullah
	 Potchefstroom
	 Nelson
	 Abu Dhabi
	 Galle International Stadium
	 Dharmasala
	 Dubai International Cricket Stadium
	 Cardiff
	 Auckland
	 Antigua
	 Rajkot
	 Delhi
	 Sydney Cricket Ground
	 Benoni
	 Bulawayo
	 East London
	 Southampton
	 Sharjah Cricket Stadium
	 Harare Sports Club
	 Bulawayo Athletic Club
	 Mumbai
	 Guyana
	 Adelaide Oval
	 Melbourne Cricket Ground
	 Brisbane
	 

	 Christchurch
	 Chattogram
	 Mumbai
	 Chandigarh
	 St Kitts
	 Pune
	 Dubai
	 Belfast
	 Sylhet
	 Auckland
	 Rajkot
	 Dharmasala
	 Harare
	 Mirpur
	 Bloemfontein
	 Southampton
	 St Lucia
	 Durban
	 Bulawayo
	 Nelson
	 Antigua
	 Guyana
	 East London
	 Hong Kong
	 Birmingham
	 Pallekele International Cricket Stadium
	 Harare Sports Club
	 Hyderabad
	 Dubai International Cricket Stadium
	 Chittagong
	 Delhi
	 Rangiri Dambulla International Stadium
	 Galle International Stadium
	 Grenada
	 Sharjah Cricket Stadium
	 Dharamsala
	 Fatullah
	 Cardiff
	 Bristol
	 Dublin
	 Melbourne Cricket Ground
	 Bulawayo Athletic Club
	 Brisbane
	 London
	 Abu Dhabi
	 Perth
	 Adelaide Oval
	 Perth Stadium
	 Dhaka
	 Kimberley
	 Manchester
similarity for location  Rajkot
	 Rajkot
	 Wellington
	 Bloemfontein
	 Nottingham
	 Kanpur
	 Lahore
	 London
	 Visakhapatnam
	 Trinidad
	 Dubai
	 Indore
	 Mumbai
	 Pallekele International Cricket Stadium
	 Chennai
	 Durban
	 Lincoln
	 Barbados
	 Kolkata
	 Canberra
	 Dharmasal

	 Edinburgh
	 Visakhapatnam
	 Chandigarh
	 Nottingham
	 Lahore
	 Chester-le-Street
	 Dharamsala
	 Bloemfontein
	 Kanpur
	 Pallekele International Cricket Stadium
	 Whangarei
	 Fatullah
	 Mumbai
	 Christchurch
	 Grenada
	 Hobart
	 Abu Dhabi
	 St Lucia
	 Kolkata
	 Napier
	 Bulawayo Athletic Club
	 Indore
	 Galle International Stadium
	 Barbados
	 Sydney Cricket Ground
	 London
	 Trinidad
	 Delhi
	 Sharjah Cricket Stadium
	 Southampton
	 East London
	 Wellington
	 Hambantota
	 Dubai
	 Leeds
	 Bulawayo
	 Centurion
	 Potchefstroom
	 Sylhet
	 Colombo
	 Paarl
	 Chattogram
	 Nelson
	 Harare
	 Rajkot
	 Mirpur
	 Dubai International Cricket Stadium
	 Chittagong
	 Hyderabad
	 Auckland
	 St Kitts
	 Kimberley
	 Brisbane
	 Perth
	 Kuala Lumpur
	 Greater Noida
	 Perth Stadium
	 Port Elizabeth
	 Bristol
	 Guyana
	 Cuttack
	 Manchester
	 Rangiri Dambulla International Stadium
	 Melbourne Cricket Ground
	 Dunedin
	 Dhaka
	 Antigua
	 Harare Sports Club
	 Benoni
	 Johannesburg
	 Adelaide Oval
similarity fo

	 Adelaide Oval
similarity for location  Bloemfontein
	 Bloemfontein
	 Kanpur
	 Visakhapatnam
	 Lahore
	 Birmingham
	 Cardiff
	 Kandy
	 Kirtipur
	 Lucknow
	 Bridgetown
	 Dehra Dun
	 Sharjah
	 Karachi
	 St George's
	 Deventer
	 Adelaide
	 Windhoek
	 Aberdeen
	 Melbourne
	 Al Amarat
	 Lauderhill
	 Rawalpindi Cricket Stadium
	 Taunton
	 Sydney
	 Gros Islet
	 Bready
	 Nagpur
	 Durban
	 Delhi
	 London
	 Ahmedabad
	 Kochi
	 Hamilton
	 Southampton
	 Nottingham
	 Fatullah
	 Lincoln
	 Ranchi
	 Colombo
	 Rajkot
	 Chester-le-Street
	 Grenada
	 Wellington
	 Cape Town
	 Bengaluru
	 Edinburgh
	 Mumbai
	 Kolkata
	 Mirpur
	 Bulawayo
	 Chattogram
	 Barbados
	 Jamaica
	 Dubai
	 Canberra
	 Chittagong
	 Chandigarh
	 Sydney Cricket Ground
	 Indore
	 Mount Maunganui
	 Chennai
	 Guyana
	 St Lucia
	 Abu Dhabi
	 Thiruvananthapuram
	 Greater Noida
	 Hong Kong
	 Centurion
	 Harare
	 Sylhet
	 Queenstown Events Centre
	 Pallekele International Cricket Stadium
	 Dharmasala
	 Dublin
	 Guwahati
	 Belfast
	 Auckland
	

	 Delhi
	 London
	 Dunedin
	 Grenada
	 Port Elizabeth
	 Whangarei
	 Potchefstroom
	 Dharamsala
	 Sylhet
	 Rajkot
	 Dharmasala
	 Southampton
	 St Kitts
	 Kuala Lumpur
	 Greater Noida
	 Centurion
	 Galle International Stadium
	 Paarl
	 Harare
	 Dublin
	 Abu Dhabi
	 Chandigarh
	 Bristol
	 Indore
	 Johannesburg
	 Pune
	 Hyderabad
	 Mumbai
	 Leeds
	 Benoni
	 Auckland
	 Antigua
	 Guyana
	 Sydney Cricket Ground
	 Bulawayo Athletic Club
	 Bulawayo
	 East London
	 Dubai International Cricket Stadium
	 Nelson
	 Rangiri Dambulla International Stadium
	 Harare Sports Club
	 Sharjah Cricket Stadium
	 Perth Stadium
	 Melbourne Cricket Ground
	 Dhaka
	 Kimberley
	 Manchester
	 Brisbane
	 Adelaide Oval
similarity for location  Mount Maunganui
	 Mount Maunganui
	 Ahmedabad
	 Bengaluru
	 Queenstown Events Centre
	 Pune
	 Lincoln
	 Hong Kong
	 Thiruvananthapuram
	 Jamaica
	 Cape Town
	 Kandy
	 Karachi
	 Lucknow
	 Bridgetown
	 Rawalpindi Cricket Stadium
	 Sharjah
	 Lauderhill
	 Al Amarat
	 Adelaide
	 Wind

	 St Kitts
	 Mirpur
	 Christchurch
	 Chandigarh
	 Thiruvananthapuram
	 Pune
	 Harare
	 East London
	 St Lucia
	 Port Elizabeth
	 Cardiff
	 Indore
	 Rajkot
	 Whangarei
	 Hobart
	 Hong Kong
	 Greater Noida
	 Chattogram
	 Fatullah
	 Birmingham
	 Bulawayo Athletic Club
	 Dubai
	 Chittagong
	 Nelson
	 Cuttack
	 Grenada
	 Dunedin
	 Dharamsala
	 Paarl
	 Pallekele International Cricket Stadium
	 Dharmasala
	 Auckland
	 Sharjah Cricket Stadium
	 Bristol
	 Dublin
	 Centurion
	 Johannesburg
	 Abu Dhabi
	 Benoni
	 Rangiri Dambulla International Stadium
	 London
	 Hyderabad
	 Perth
	 Antigua
	 Galle International Stadium
	 Harare Sports Club
	 Dubai International Cricket Stadium
	 Brisbane
	 Melbourne Cricket Ground
	 Dhaka
	 Perth Stadium
	 Kimberley
	 Adelaide Oval
	 Manchester
similarity for location  Hyderabad
	 Hyderabad
	 Napier
	 Hambantota
	 Belfast
	 Abu Dhabi
	 Chittagong
	 Chester-le-Street
	 Nagpur
	 Paarl
	 St Lucia
	 Windhoek
	 Dehra Dun
	 Kandy
	 Lauderhill
	 Deventer
	 Adelaide
	 Ab

In [74]:
pd.DataFrame(similarity_record)

Unnamed: 0,source,similar_1,similar_2,similar_3,similar_4
0,Port Elizabeth,Kuala Lumpur,Potchefstroom,Napier,Trinidad
1,Sydney,Gros Islet,Lauderhill,Adelaide,Rawalpindi Cricket Stadium
2,Kirtipur,Gros Islet,Lauderhill,Adelaide,Rawalpindi Cricket Stadium
3,Mumbai,Southampton,Kolkata,Kanpur,Cardiff
4,Karachi,Gros Islet,Lauderhill,Adelaide,Rawalpindi Cricket Stadium
...,...,...,...,...,...
109,Lincoln,Lahore,Sharjah,Bridgetown,Dehra Dun
110,Manchester,Birmingham,Fatullah,Sharjah Cricket Stadium,Perth Stadium
111,Chester-le-Street,Nagpur,Sharjah,Melbourne,Bready
112,Kolkata,Ranchi,Hamilton,Cape Town,Colombo


In [75]:
pd.DataFrame(similarity_record).to_excel('location_similarity.xlsx',index=False)

In [34]:
# first_innings_vec_map={}
# teams=[]
# batting_one_hot_list =[]
# for team in country_enc_map.keys():
#     country_enc = np.array(country_enc_map[team]).reshape(1,-1)
#     country_vec = team_model.predict(country_enc)
#     first_innings_vec_map[team]=country_vec[0]
#     batting_one_hot_list.append(np.array(country_enc_map[team]))
#     teams.append(team)
#     #break
    
# team_matrix = team_model.predict(np.stack(batting_one_hot_list))

In [35]:
# team_sim_matrix = cosine_similarity(team_matrix)

In [36]:
# for ind,team in enumerate(teams):
#     print('similarity for location ',team)
#     for similarity_index in reversed(list(team_sim_matrix[ind].argsort())):
#         print('\t',teams[similarity_index])

In [37]:
#team_model.predict(np.array(country_enc_map['India']).reshape(1,-1))

In [38]:
#opponent_model.predict(np.array(country_enc_map['India']).reshape(1,-1))

In [39]:
# second_innings_vec_map={}
# #teams=[]
# #second_one_hot_list =[]
# for team in country_enc_map.keys():
#     country_enc = np.array(country_enc_map[team]).reshape(1,-1)
#     country_vec = opponent_model.predict(country_enc)
#     second_innings_vec_map[team]=country_vec[0]
#     #second_one_hot_list.append(np.array(country_enc_map[team]))
#     #teams.append(team)
#     #break
    
# #team_matrix = team_model.predict(np.stack(batting_one_hot_list))

In [41]:
import pickle
# pickle.dump(location_vec_map,open('location_vector_map.pkl','wb'))
# pickle.dump(first_innings_vec_map,open('first_innings_vec_map.pkl','wb'))
# pickle.dump(second_innings_vec_map,open('second_innings_vec_map.pkl','wb'))

# pickle.dump(enc_map,open('country_location_enc_map.pkl','wb'))
# pickle.dump(decode_map,open('country_location_dec_map.pkl','wb'))

# pickle.dump(country_enc_map,open('country_enc_map.pkl','wb'))
# pickle.dump(loc_enc_map,open('loc_enc_map.pkl','wb'))
# pickle.dump(enc_country_map,open('enc_country_map.pkl','wb'))
# pickle.dump(enc_loc_map,open('enc_loc_map.pkl','wb'))




In [42]:
def store_model(model,name):
    # serialize model to JSON
    model_json = model.to_json()
    with open(name+".json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(name+".h5")
    print("Saved model to disk")

In [68]:
store_model(runs_model,'run_model_embedding_V2')
store_model(team_model,'first_inn_model_embedding_V2')
store_model(opponent_model,'second_inn_model_embedding_V2')
store_model(location_model,'location_model_embedding_V2')
store_model(group_encode_model,'group_encode_model_V2')

Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
Saved model to disk
