### Libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [61]:
import opti_recruit.feature_engineering as fe
import opti_recruit.get_team_features as gtf
import opti_recruit.similarity as si
import pandas as pd
import pickle 
from sklearn.pipeline import Pipeline,make_pipeline,make_union
from sklearn.compose import make_column_transformer,make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from opti_recruit.data import get_data, clean_data
import numpy as np

### Import data

#### Fifa 22

In [62]:

df22 = get_data()[22]
df22.head()

Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,93,78000000.0,320000.0,34,...,50+3,50+3,50+3,61+3,19+3,https://cdn.sofifa.net/players/158/023/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,https://cdn.sofifa.net/teams/1369/60.png,https://cdn.sofifa.net/flags/ar.png
1,188545,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,92,92,119500000.0,270000.0,32,...,60+3,60+3,60+3,61+3,19+3,https://cdn.sofifa.net/players/188/545/22_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,https://cdn.sofifa.net/teams/1353/60.png,https://cdn.sofifa.net/flags/pl.png
2,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",91,91,45000000.0,270000.0,36,...,53+3,53+3,53+3,60+3,20+3,https://cdn.sofifa.net/players/020/801/22_120.png,https://cdn.sofifa.net/teams/11/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
3,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",91,91,129000000.0,270000.0,29,...,50+3,50+3,50+3,62+3,20+3,https://cdn.sofifa.net/players/190/871/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,,https://cdn.sofifa.net/flags/br.png
4,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CM, CAM",91,91,125500000.0,350000.0,30,...,69+3,69+3,69+3,75+3,21+3,https://cdn.sofifa.net/players/192/985/22_120.png,https://cdn.sofifa.net/teams/10/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1325/60.png,https://cdn.sofifa.net/flags/be.png


#### FBref player list 

In [63]:
json_player = pd.read_json('../raw_data/sofifa_ids.json').rename(columns={0: "sofifa_id"})
json_player.shape

(7717, 1)

In [64]:
list_json_player = list(json_player.sofifa_id)
# list_json_player

#### Filter df22 with json_player



In [65]:
boolean_series = df22.sofifa_id.isin(list_json_player)
filtered_df22 = df22[boolean_series]
filtered_df22.shape

(6760, 110)

#### Add engineered feature for similarity matrix


In [78]:
# df22_boost = fe.add_features(filtered_df22)
df22_boost = fe.add_features(df22)

In [79]:
df22_boost.prefered_pos.value_counts()

CB     3339
ST     2608
CM     2173
GK     2132
CDM    1665
LB     1360
RB     1346
CAM    1151
RM     1028
LM     1016
RW      495
LW      435
RWB     178
LWB     171
CF      142
Name: prefered_pos, dtype: int64

### Data Preparation

#### Prepare Numeric Dataframe

In [80]:
to_drop = ['sofifa_id','short_name','player_positions','height_cm','weight_kg','club_team_id'
          ,'club_name' ,'league_name','club_position','club_joined','club_contract_valid_until','nationality_name'
          ,'nation_team_id','preferred_foot','weak_foot','work_rate','body_type','player_tags','player_traits'
          ,'is_bench','potential_diff','age_bin','player_pos','new_nationality'
          ,'value_eur','wage_eur','release_clause_eur']
#             'age']

In [81]:
df22_num = df22_boost.drop(to_drop, axis = 1)

#### Pipeline

In [82]:
from sklearn.pipeline import Pipeline,make_pipeline,make_union
from sklearn.compose import make_column_transformer,make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn import set_config; set_config(display='diagram')


num_transformer = make_pipeline(SimpleImputer(), StandardScaler())
num_col = make_column_selector(dtype_include=['float64','int64'])

cat_transformer = OneHotEncoder()
cat_col = make_column_selector(dtype_include=['object','category'])

preproc_basic = make_column_transformer(
    (num_transformer, num_col),
    (cat_transformer, cat_col),
    remainder='passthrough')

In [83]:
SimpleImputer.get_feature_names_out = (lambda self, names=None: self.feature_names_in_)

#### Pipeline DF

In [84]:
X_train_transformed = preproc_basic.fit_transform(df22_num)


test_df = pd.DataFrame(X_train_transformed, 
             columns=preproc_basic.get_feature_names_out()
            )


### Cosine Similarity

#### Raw sim matrix

In [85]:
from sklearn.metrics.pairwise import cosine_similarity 
similarities = cosine_similarity(test_df)

#### Misc functions

In [86]:
def normalize(array):
    return np.array([round(num, 2) for num in (array - min(array))*100/(max(array)-min(array))])

def get_index(x):
    return df22[df22['sofifa_id']==x].index.tolist()[0]

#### Testing

In [89]:
playerIndex = similarities[0,:].argsort()[-51:][::-1][1:]
for i in playerIndex:
        print(i,df22.loc[i,'short_name'])

75 E. Hazard
3 Neymar Jr
17 M. Salah
79 M. Reus
13 H. Son
405 A. Sánchez
141 C. Vela
64 K. Coman
33 Á. Di María
11 K. Benzema
30 S. Agüero
89 S. Gnabry
2 Cristiano Ronaldo
83 A. Griezmann
182 Marco Asensio
27 R. Sterling
41 P. Dybala
132 L. Sané
16 S. Mané
58 R. Mahrez
1 R. Lewandowski
6 K. Mbappé
333 E. Forsberg
344 J. Brandt
81 P. Aubameyang
23 L. Suárez
204 G. Bale
429 J. Draxler
892 Alex Teixeira
1521 S. Feghouli
215 M. Götze
218 A. Lacazette
444 Q. Promes
121 Y. Carrasco
533 S. El Shaarawy
342 A. Martial
212 Coutinho
543 X. Shaqiri
138 K. Havertz
72 David Silva
355 H. Lozano
109 D. Tadić
35 T. Müller
156 H. Mkhitaryan
55 L. Insigne
877 K. Gameiro
131 N. Fekir
1151 T. Walcott
69 A. Gómez
4 K. De Bruyne


In [90]:
index = get_index(158023)
index

0

In [91]:
# list of reco for a specfic player -- 
# print((similarities[index,:]), similarities[index,:].argsort())
## list of reco for a specfic player, sorted by index DESC -- similarities[index,:].argsort()
## list of reco for a specfic player, sorted by index ASC -- similarities[index,:].argsort()[::-1]
## list of reco for specfic player, sorted by index ASC, without player itself -- similarities[index,:].argsort()[::-1]

In [92]:
print(df22.loc[75,'short_name'])

E. Hazard


In [93]:
similarities[0,:].argsort()[-101:][::-1][1:]

array([  75,    3,   17,   79,   13,  405,  141,   64,   33,   11,   30,
         89,    2,   83,  182,   27,   41,  132,   16,   58,    1,    6,
        333,  344,   81,   23,  204,  429,  892, 1521,  215,  218,  444,
        121,  533,  342,  212,  543,  138,   72,  355,  109,   35,  156,
         55,  877,  131, 1151,   69,    4,  179,  548,   86,  406,  415,
        210,  884,  283,  299,   32,  223,  100,  398,  646,  286,  263,
        638,  175,    9,  310,  112,  193,  118,   45,   36,  700,  259,
         97,  192,  308,  126,   94,  123, 1790,   25,  296, 1507,   85,
        684,  213,  511,  291,  195,  318,  518,  558,  451,   37,  209,
       1475])

In [35]:
list(reversed(similarities[index,:].argsort()))

[0,
 75,
 3,
 17,
 79,
 13,
 405,
 141,
 64,
 33,
 11,
 30,
 89,
 2,
 83,
 182,
 27,
 41,
 132,
 16,
 58,
 1,
 6,
 333,
 344,
 81,
 23,
 204,
 429,
 892,
 1521,
 215,
 218,
 444,
 121,
 533,
 342,
 212,
 543,
 138,
 72,
 355,
 109,
 35,
 156,
 55,
 877,
 131,
 1151,
 69,
 4,
 179,
 548,
 86,
 406,
 415,
 210,
 884,
 283,
 299,
 32,
 223,
 100,
 398,
 646,
 286,
 263,
 638,
 175,
 9,
 310,
 112,
 193,
 118,
 45,
 36,
 700,
 259,
 97,
 192,
 308,
 126,
 94,
 123,
 1790,
 25,
 296,
 1507,
 85,
 684,
 213,
 511,
 291,
 195,
 318,
 518,
 558,
 451,
 37,
 209,
 1475,
 234,
 863,
 1596,
 951,
 326,
 163,
 394,
 60,
 407,
 1159,
 433,
 139,
 116,
 254,
 372,
 268,
 727,
 634,
 145,
 339,
 1295,
 639,
 516,
 878,
 168,
 601,
 1091,
 74,
 110,
 448,
 358,
 314,
 1168,
 551,
 28,
 1565,
 1303,
 2429,
 397,
 300,
 986,
 164,
 434,
 135,
 1560,
 1629,
 536,
 105,
 241,
 2012,
 122,
 617,
 572,
 953,
 692,
 360,
 2054,
 78,
 471,
 349,
 521,
 172,
 563,
 651,
 616,
 1662,
 1639,
 92,
 1178,
 1390,
 

In [94]:
d = {
    'index' : index
    ,'scores':np.sort(normalize(similarities[index,:]))[-101:][::-1][1:]
    ,'reco_player_index': similarities[index,:].argsort()[-101:][::-1][1:]
}

d

{'index': 0,
 'scores': array([98.56, 98.21, 98.01, 97.91, 97.91, 97.88, 97.85, 97.76, 97.63,
        97.57, 97.56, 97.56, 97.55, 97.4 , 97.32, 97.2 , 96.95, 96.93,
        96.92, 96.91, 96.84, 96.69, 96.65, 96.64, 96.62, 96.56, 96.54,
        96.52, 96.51, 96.48, 96.48, 96.47, 96.42, 96.33, 96.29, 96.29,
        96.28, 96.21, 96.2 , 96.19, 96.16, 96.15, 96.14, 96.05, 96.04,
        95.97, 95.94, 95.93, 95.91, 95.91, 95.88, 95.79, 95.77, 95.76,
        95.66, 95.62, 95.61, 95.53, 95.45, 95.44, 95.43, 95.41, 95.4 ,
        95.39, 95.37, 95.36, 95.33, 95.33, 95.3 , 95.25, 95.25, 94.99,
        94.94, 94.91, 94.9 , 94.89, 94.87, 94.79, 94.76, 94.56, 94.53,
        94.48, 94.48, 94.43, 94.42, 94.39, 94.37, 94.37, 94.34, 94.23,
        94.22, 94.17, 94.16, 94.13, 94.12, 94.12, 94.1 , 94.06, 94.05,
        94.04]),
 'reco_player_index': array([  75,    3,   17,   79,   13,  405,  141,   64,   33,   11,   30,
          89,    2,   83,  182,   27,   41,  132,   16,   58,    1,    6,
         3

In [95]:
for i in tqdm(range(0,1)):
# for i in range(0,2):
    d = {
        'index':i
        ,'scores':np.sort(normalize(similarities[i,:]))[-101:][::-1][1:]
        ,'reco_player_index': similarities[i,:].argsort()[-101:][::-1][1:]
    }
    new_simili.append(d)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(0,1)):


  0%|          | 0/1 [00:00<?, ?it/s]

[{'index': 0,
  'scores': array([98.56, 98.21, 98.01, 97.91, 97.91, 97.88, 97.85, 97.76, 97.63,
         97.57, 97.56, 97.56, 97.55, 97.4 , 97.32, 97.2 , 96.95, 96.93,
         96.92, 96.91, 96.84, 96.69, 96.65, 96.64, 96.62, 96.56, 96.54,
         96.52, 96.51, 96.48, 96.48, 96.47, 96.42, 96.33, 96.29, 96.29,
         96.28, 96.21, 96.2 , 96.19, 96.16, 96.15, 96.14, 96.05, 96.04,
         95.97, 95.94, 95.93, 95.91, 95.91, 95.88, 95.79, 95.77, 95.76,
         95.66, 95.62, 95.61, 95.53, 95.45, 95.44, 95.43, 95.41, 95.4 ,
         95.39, 95.37, 95.36, 95.33, 95.33, 95.3 , 95.25, 95.25, 94.99,
         94.94, 94.91, 94.9 , 94.89, 94.87, 94.79, 94.76, 94.56, 94.53,
         94.48, 94.48, 94.43, 94.42, 94.39, 94.37, 94.37, 94.34, 94.23,
         94.22, 94.17, 94.16, 94.13, 94.12, 94.12, 94.1 , 94.06, 94.05,
         94.04]),
  'reco_player_index': array([  75,    3,   17,   79,   13,  405,  141,   64,   33,   11,   30,
           89,    2,   83,  182,   27,   41,  132,   16,   58,    1,  

#### Similarity : List of dict for each player ( index looked up player , scores, index of reco players)

In [96]:
from tqdm import tqdm_notebook as tqdm
import ipywidgets 

In [104]:
new_simili = []
for i in tqdm(range(0,len(similarities))):
# for i in range(0,2):
    d = {
        'index':i
        ,'scores':np.sort(normalize(similarities[i,:]))[-101:][::-1][1:]
        ,'reco_player_index': similarities[i,:].argsort()[-101:][::-1][1:]
    }
    new_simili.append(d)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(range(0,len(similarities))):


  0%|          | 0/19239 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [99]:
(new_simili[0])

{'index': 0,
 'scores': array([98.56, 98.21, 98.01, 97.91, 97.91, 97.88, 97.85, 97.76, 97.63,
        97.57, 97.56, 97.56, 97.55, 97.4 , 97.32, 97.2 , 96.95, 96.93,
        96.92, 96.91, 96.84, 96.69, 96.65, 96.64, 96.62, 96.56, 96.54,
        96.52, 96.51, 96.48, 96.48, 96.47, 96.42, 96.33, 96.29, 96.29,
        96.28, 96.21, 96.2 , 96.19, 96.16, 96.15, 96.14, 96.05, 96.04,
        95.97, 95.94, 95.93, 95.91, 95.91]),
 'reco_player_index': array([  75,    3,   17,   79,   13,  405,  141,   64,   33,   11,   30,
          89,    2,   83,  182,   27,   41,  132,   16,   58,    1,    6,
         333,  344,   81,   23,  204,  429,  892, 1521,  215,  218,  444,
         121,  533,  342,  212,  543,  138,   72,  355,  109,   35,  156,
          55,  877,  131, 1151,   69,    4])}

#### Dataframe of the list of dict 

In [100]:
sim_df_test = pd.DataFrame.from_dict(new_simili)

In [102]:
sim_df_test

Unnamed: 0,index,scores,reco_player_index
0,0,"[98.56, 98.21, 98.01, 97.91, 97.91, 97.88, 97....","[75, 3, 17, 79, 13, 405, 141, 64, 33, 11, 30, ..."
1,1,"[99.14, 98.66, 98.49, 98.39, 98.36, 98.05, 97....","[23, 2, 11, 36, 30, 9, 74, 218, 16, 25, 83, 17..."
2,2,"[98.66, 98.25, 98.06, 97.83, 97.73, 97.58, 97....","[1, 30, 11, 16, 218, 83, 0, 6, 405, 1507, 36, ..."
3,3,"[99.06, 98.4, 98.33, 98.31, 98.19, 98.18, 98.1...","[75, 27, 33, 13, 89, 17, 0, 64, 79, 16, 55, 40..."
4,4,"[98.84, 98.51, 98.25, 98.2, 98.01, 97.91, 97.7...","[116, 28, 78, 308, 37, 32, 172, 870, 415, 122,..."
...,...,...,...
19234,19234,"[97.4, 97.04, 96.96, 96.86, 96.79, 96.62, 96.5...","[19080, 19140, 19105, 19158, 19236, 19208, 191..."
19235,19235,"[96.13, 95.66, 95.61, 95.23, 95.17, 94.97, 94....","[19157, 18738, 19236, 19060, 19208, 19158, 191..."
19236,19236,"[98.39, 98.23, 97.46, 97.35, 97.32, 97.3, 97.1...","[19158, 19226, 19157, 19175, 19217, 19147, 192..."
19237,19237,"[98.48, 98.01, 97.81, 97.22, 97.15, 97.13, 97....","[19041, 19056, 19077, 19203, 18933, 19177, 190..."


In [103]:
with open(r'../similarity_matrix_v3.pickle', 'wb') as file:
        pickle.dump(sim_df_test, file)

### Method to construct list of reco dictionaries for a given player 

#### Load pickle DF

In [106]:
with open("../similarity_matrix_v3.pickle", 'rb') as file:
    sim_matrix_df = pickle.load(file)

In [107]:
sim_matrix_df.scores.head()

0    [98.56, 98.21, 98.01, 97.91, 97.91, 97.88, 97....
1    [99.14, 98.66, 98.49, 98.39, 98.36, 98.05, 97....
2    [98.66, 98.25, 98.06, 97.83, 97.73, 97.58, 97....
3    [99.06, 98.4, 98.33, 98.31, 98.19, 98.18, 98.1...
4    [98.84, 98.51, 98.25, 98.2, 98.01, 97.91, 97.7...
Name: scores, dtype: object

In [108]:
print(sim_matrix_df.scores[1])
type(sim_matrix_df.scores[1])
len(sim_matrix_df.reco_player_index[0])

[99.14 98.66 98.49 98.39 98.36 98.05 97.92 97.8  97.72 97.69 97.59 97.38
 97.13 97.01 96.96 96.92 96.85 96.78 96.73 96.71 96.69 96.49 96.48 96.46
 96.45 96.38 96.33 96.32 96.21 96.12 96.06 96.05 96.01 96.01 96.   95.86
 95.74 95.74 95.68 95.61 95.58 95.58 95.56 95.51 95.49 95.44 95.36 95.34
 95.32 95.26]


50

#### Create the list for index 0 and transform into Dataframe


In [109]:
index_search = 0 

length = len(sim_matrix_df.reco_player_index[index_search])
length
# list_res=[]
# for i in range(0,length):
#     d = {
#         'index_search' : index_search,
#         'index' : sim_matrix_df.reco_player_index[index_search][i],
#         'score': sim_matrix_df.scores[index_search][i]
#         }
#     list_res.append(d)
# list_res

50

In [19]:
my_reco = pd.DataFrame(list_res)

# my_reco['sofifa_id']=df22.iloc[my_reco['index']]['sofifa_id']


In [20]:
sofifa_index = df22[['sofifa_id']]
sofifa_index

Unnamed: 0,sofifa_id
0,158023
1,188545
2,20801
3,190871
4,192985
...,...
19234,261962
19235,262040
19236,262760
19237,262820


In [28]:
myreco= my_reco.set_index('index').head(20)

In [29]:
myreco['sofifa_id']=df22.iloc[myreco.index]['sofifa_id']

In [22]:
my_reco2.head(10)

Unnamed: 0,index_search,index,score,sofifa_id
0,0,73,97.88,158023
1,0,3,97.48,188545
2,0,16,97.36,20801
3,0,77,97.1,190871
4,0,32,96.87,192985
5,0,376,96.82,200389
6,0,40,96.81,231747
7,0,11,96.7,167495
8,0,175,96.68,192448
9,0,63,96.67,202126


In [30]:
myreco

Unnamed: 0_level_0,index_search,score,sofifa_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
73,0,97.88,177683
3,0,97.48,190871
16,0,97.36,208722
77,0,97.1,185122
32,0,96.87,177003
376,0,96.82,234742
40,0,96.81,209658
11,0,96.7,165153
175,0,96.68,213565
63,0,96.67,212218


In [65]:
df22.head(20)

Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,"RW, ST, CF",93,93,78000000.0,320000.0,34,...,50+3,50+3,50+3,61+3,19+3,https://cdn.sofifa.net/players/158/023/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,https://cdn.sofifa.net/teams/1369/60.png,https://cdn.sofifa.net/flags/ar.png
1,188545,https://sofifa.com/player/188545/robert-lewand...,R. Lewandowski,Robert Lewandowski,ST,92,92,119500000.0,270000.0,32,...,60+3,60+3,60+3,61+3,19+3,https://cdn.sofifa.net/players/188/545/22_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,https://cdn.sofifa.net/teams/1353/60.png,https://cdn.sofifa.net/flags/pl.png
2,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,"ST, LW",91,91,45000000.0,270000.0,36,...,53+3,53+3,53+3,60+3,20+3,https://cdn.sofifa.net/players/020/801/22_120.png,https://cdn.sofifa.net/teams/11/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
3,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Júnior,"LW, CAM",91,91,129000000.0,270000.0,29,...,50+3,50+3,50+3,62+3,20+3,https://cdn.sofifa.net/players/190/871/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,,https://cdn.sofifa.net/flags/br.png
4,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,"CM, CAM",91,91,125500000.0,350000.0,30,...,69+3,69+3,69+3,75+3,21+3,https://cdn.sofifa.net/players/192/985/22_120.png,https://cdn.sofifa.net/teams/10/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1325/60.png,https://cdn.sofifa.net/flags/be.png
5,200389,https://sofifa.com/player/200389/jan-oblak/220002,J. Oblak,Jan Oblak,GK,91,93,112000000.0,130000.0,28,...,33+3,33+3,33+3,32+3,89+3,https://cdn.sofifa.net/players/200/389/22_120.png,https://cdn.sofifa.net/teams/240/60.png,https://cdn.sofifa.net/flags/es.png,,https://cdn.sofifa.net/flags/si.png
6,231747,https://sofifa.com/player/231747/kylian-mbappe...,K. Mbappé,Kylian Mbappé Lottin,"ST, LW",91,95,194000000.0,230000.0,22,...,54+3,54+3,54+3,63+3,18+3,https://cdn.sofifa.net/players/231/747/22_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,https://cdn.sofifa.net/teams/1335/60.png,https://cdn.sofifa.net/flags/fr.png
7,167495,https://sofifa.com/player/167495/manuel-neuer/...,M. Neuer,Manuel Peter Neuer,GK,90,90,13500000.0,86000.0,35,...,34+3,34+3,34+3,35+3,88+2,https://cdn.sofifa.net/players/167/495/22_120.png,https://cdn.sofifa.net/teams/21/60.png,https://cdn.sofifa.net/flags/de.png,https://cdn.sofifa.net/teams/1337/60.png,https://cdn.sofifa.net/flags/de.png
8,192448,https://sofifa.com/player/192448/marc-andre-te...,M. ter Stegen,Marc-André ter Stegen,GK,90,92,99000000.0,250000.0,29,...,33+3,33+3,33+3,31+3,88+3,https://cdn.sofifa.net/players/192/448/22_120.png,https://cdn.sofifa.net/teams/241/60.png,https://cdn.sofifa.net/flags/es.png,,https://cdn.sofifa.net/flags/de.png
9,202126,https://sofifa.com/player/202126/harry-kane/22...,H. Kane,Harry Kane,ST,90,90,129500000.0,240000.0,27,...,61+3,61+3,61+3,64+3,20+3,https://cdn.sofifa.net/players/202/126/22_120.png,https://cdn.sofifa.net/teams/18/60.png,https://cdn.sofifa.net/flags/gb-eng.png,https://cdn.sofifa.net/teams/1318/60.png,https://cdn.sofifa.net/flags/gb-eng.png


In [24]:
import opti_recruit.similarity as si

In [37]:
si.cosine_recommendation(158023, sim_matrix_df, df22)

158023


Unnamed: 0_level_0,index_search,score,sofifa_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
73,0,97.88,177683
3,0,97.48,190871
16,0,97.36,208722
77,0,97.10,185122
32,0,96.87,177003
...,...,...,...
280,0,92.40,241461
222,0,92.32,194404
591,0,92.31,212442
478,0,92.23,227928


In [10]:
df22[df22['sofifa_id']==177683]

Unnamed: 0,sofifa_id,player_url,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,age,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
73,177683,https://sofifa.com/player/177683/yann-sommer/2...,Y. Sommer,Yann Sommer,GK,85,85,26500000.0,40000.0,32,...,29+3,29+3,29+3,29+3,83+2,https://cdn.sofifa.net/players/177/683/22_120.png,https://cdn.sofifa.net/teams/23/60.png,https://cdn.sofifa.net/flags/de.png,,https://cdn.sofifa.net/flags/ch.png


In [12]:
my_reco_df = si.cosine_recommendation(158023, sim_matrix_df, df22)

#transform reco_list into dataframe
df_22_filtered = si.filter_params(df22, 21, 29, 1, 999999999,None)

#filter my_reco_df with df_filtered
boolean_series = my_reco_df.sofifa_id.isin(df_22_filtered)
my_reco_filt = my_reco_df[boolean_series]

print(my_reco_df.shape,my_reco_filt.shape)
my_reco_df

158023
(100, 3) (55, 3)


Unnamed: 0_level_0,index_search,score,sofifa_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
73,0,97.88,177683
3,0,97.48,190871
16,0,97.36,208722
77,0,97.10,185122
32,0,96.87,177003
...,...,...,...
280,0,92.40,241461
222,0,92.32,194404
591,0,92.31,212442
478,0,92.23,227928


In [47]:
si.get_list_dict(my_reco_filt)

[{'sofifa_id': 190871, 'score': 97.48, 'index': 0},
 {'sofifa_id': 208722, 'score': 97.36, 'index': 1},
 {'sofifa_id': 234742, 'score': 96.82, 'index': 2},
 {'sofifa_id': 209658, 'score': 96.81, 'index': 3},
 {'sofifa_id': 213565, 'score': 96.68, 'index': 4},
 {'sofifa_id': 212218, 'score': 96.67, 'index': 5},
 {'sofifa_id': 201535, 'score': 96.59, 'index': 6},
 {'sofifa_id': 202556, 'score': 96.5, 'index': 7},
 {'sofifa_id': 216393, 'score': 96.37, 'index': 8},
 {'sofifa_id': 203376, 'score': 95.8, 'index': 9},
 {'sofifa_id': 231747, 'score': 95.62, 'index': 10},
 {'sofifa_id': 206517, 'score': 95.43, 'index': 11},
 {'sofifa_id': 199304, 'score': 95.41, 'index': 12},
 {'sofifa_id': 228941, 'score': 95.29, 'index': 13},
 {'sofifa_id': 216352, 'score': 95.15, 'index': 14},
 {'sofifa_id': 236401, 'score': 95.14, 'index': 15},
 {'sofifa_id': 239053, 'score': 95.12, 'index': 16},
 {'sofifa_id': 198176, 'score': 95.1, 'index': 17},
 {'sofifa_id': 207410, 'score': 94.97, 'index': 18},
 {'sof