# Finding the Best Player of a Team

### 1. Importing packages

In [1]:
import numpy as np
import pandas as pd
from gensim.models import KeyedVectors

from ipynb.fs.defs.capitals import cosine_similarity
from ipynb.fs.defs.capitals import euclidean

### 2. Loading Google News embeddings

In [2]:
embeddings = KeyedVectors.load_word2vec_format('./src/GoogleNews-vectors-negative300.bin', binary = True)

In [3]:
word_embeddings = {}
for word in embeddings.vocab:
    word_embeddings[word] = embeddings[word]

### 3. Predicting relationships among words

**Defining get player function**

**Inputs** :  
*ref_team*: a string (the team as reference)  
*ref_player*: a string (the player of team as reference)  
*des_team*: a string (the desired team)  
*embeddings*: a dictionary where the keys are words and values are their embeddings  
*method*: method to use in the model (either cosine similarity "cos" or euclidean distance "d")

**Outputs** :  
*des_player*: a dictionary with the most likely player and its similarity score

In [4]:
def get_player(ref_team, ref_player, des_team, embeddings, method = "cos"):
    
    group = set([ref_team, ref_player, des_team])
    ref_team_emb = embeddings[ref_team]
    ref_player_emb = embeddings[ref_player]
    des_team_emb = embeddings[des_team]

    vec = ref_player_emb - ref_team_emb + des_team_emb

    des_player = ''

    if method == "cos":
        similarity = -1
        for word in embeddings.keys():
            if word not in group:
                word_emb = embeddings[word]
                cur_similarity = cosine_similarity(vec, word_emb)
                if cur_similarity > similarity:
                    similarity = cur_similarity
                    des_player = (word, similarity)
    
    if method == "d":
        distance = 10000
        for word in embeddings.keys():
            if word not in group:
                word_emb = embeddings[word]       
                cur_distance = euclidean(vec, word_emb)
                if cur_distance < distance:
                    distance = cur_distance
                    des_player = (word, distance)

    return des_player

**Predicting**

In [5]:
ref_team = 'Barcelona'
ref_player = 'Messi'
des_team = 'Bayern'

In [6]:
predict_cos = get_player(ref_team, ref_player, des_team, word_embeddings, method = "cos")
predict_d = get_player(ref_team, ref_player, des_team, word_embeddings, method = "d")

print(predict_cos)
print(predict_d)

('Schweinsteiger', 0.788617)
('Schweinsteiger', 3.8047056)


### 4. Finding Messi-like players for other teams

**Loading data**

In [7]:
data = pd.read_csv('teams.txt', delimiter=' ')
data.columns = ['ref_team', 'ref_player', 'des_team']

data.head()

Unnamed: 0,ref_team,ref_player,des_team
0,Barcelona,Lionel_Messi,Bayern_Munich
1,Barcelona,Lionel_Messi,Real_Madrid
2,Barcelona,Lionel_Messi,Juventus
3,Barcelona,Lionel_Messi,Chelsea
4,Barcelona,Lionel_Messi,Manchester_Utd


**Finding players**

In [8]:
df = pd.DataFrame(columns=['Team','Player'])
for i, row in data.iterrows():
    ref_team = data['ref_team'][i]
    ref_player = data['ref_player'][i]
    des_team =  data['des_team'][i]
    
    predicted_player, _ = get_player(ref_team, ref_player, des_team, word_embeddings)
    
    result = {'Team':des_team, 'Player':predicted_player}
    df = df.append(result,ignore_index=True)

print(df)

                   Team              Player
0         Bayern_Munich        Arjen_Robben
1           Real_Madrid   Cristiano_Ronaldo
2              Juventus  Zlatan_Ibrahimovic
3               Chelsea        Wayne_Rooney
4        Manchester_Utd        Wayne_Rooney
5       Atletico_Madrid     Fernando_Torres
6   Paris_Saint_Germain       Didier_Drogba
7              Dortmund      Lukas_Podolski
8              AC_Milan          Ronaldinho
9           Inter_Milan        Samuel_Eto'o
10              Arsenal        Wayne_Rooney
11              AS_Roma        Diego_Milito
