In [11]:
# !pip install dfply
from dfply import * 
import networkx as nx
import matplotlib.pyplot as plt
from bokeh.io import show, output_notebook
from bokeh.models import Plot, Range1d, MultiLine, Circle
from bokeh.models.graphs import from_networkx
from bokeh.transform import factor_cmap
from bokeh.transform import linear_cmap
import pandas as pd
from bokeh.palettes import Spectral11, Colorblind
from math import ceil
from math import pow
from bokeh.models import (BoxZoomTool, Circle, HoverTool,PanTool,
                          MultiLine, Plot, Range1d, ResetTool, NodesAndLinkedEdges,TapTool)
import warnings
warnings.filterwarnings("ignore")

In [12]:
transfers_df = pd.read_pickle("./../../Capstone/Data/Clean/Transfers_Network.pkl")
transfers_df = transfers_df.sort_values("date",ascending = True) 
transfers_df = transfers_df.drop_duplicates()
transfers_df = transfers_df.dropna()

# Network's preparation functions

In [13]:
def box_cox_normalization(node_size):
    size = 0.5
    l = 0.5
    compressed_point = (pow(node_size, l) - 1) / l 
    return ceil(size*compressed_point)


def z_score(input_v, avg , sd):
  return (input_v - avg) / sd


def edge_color(t_type):
  summer, winter, mid = "orange", "black", "red"
  if(t_type == "summer"):
    return summer
  elif(t_type == "winter"):
    return winter
  else:
    return mid

def transfers_detailed(transfers_df):
  teams_spent = transfers_df.groupby(['to','season']).apply(lambda x: x.fee.sum())
  teams_spent = teams_spent.reset_index(inplace = False, drop = False)
  teams_spent.rename({0:"spent"}, axis = "columns", inplace = True)

  teams_received = transfers_df.groupby(['from','season']).apply(lambda x: x.fee.sum())
  teams_received = teams_received.reset_index(inplace = False, drop = False)
  teams_received.rename({0:"received"}, axis = "columns", inplace = True)

  leagues_spent = transfers_df.groupby(['to_league','season']).apply(lambda x: x.fee.sum())
  leagues_spent = leagues_spent.reset_index(inplace = False, drop = False)
  leagues_spent.rename({0:"spent_league"}, axis = "columns", inplace = True)

  leagues_received = transfers_df.groupby(['from_league','season']).apply(lambda x: x.fee.sum())
  leagues_received = leagues_received.reset_index(inplace = False, drop = False)
  leagues_received.rename({0:"received_league"}, axis = "columns", inplace = True)

  transfers_df = pd.merge(transfers_df, teams_spent, on = ["to","season"])
  transfers_df = pd.merge(transfers_df, teams_received, on = ["from","season"])
  transfers_df = pd.merge(transfers_df, leagues_spent, on = ["to_league","season"])
  transfers_df = pd.merge(transfers_df, leagues_received, on = ["from_league","season"])

  return transfers_df

def nodes_attributes(transfers_df):
  from_info = transfers_df[["from","from_country","from_cont","from_league","from_league_class"]].drop_duplicates("from")
  from_info.rename(lambda x: x.replace("from_",""), axis = "columns", inplace = True)
  from_info.rename({"from":"club"}, axis = "columns", inplace = True)

  to_info = transfers_df[["to","to_country","to_cont","to_league","to_league_class"]].drop_duplicates("to")
  to_info.rename(lambda x: x.replace("to_",""), axis = "columns", inplace = True)
  to_info.rename({"to":"club"}, axis = "columns", inplace = True)

  nodes_attr = pd.merge(from_info, to_info, on = ["club","country","cont","league","league_class"]).drop_duplicates()

  from_received = (transfers_df >> 
  group_by(X["from"]) >> 
  summarize( received = X.received.sum()
            ))
  from_received.rename({"from":"club"}, axis = "columns", inplace = True)
  to_spent = (transfers_df >> 
  group_by(X["to"]) >> 
  summarize( spent = X.spent.sum()
            ))
  to_spent.rename({"to":"club"}, axis = "columns", inplace = True)
  spent_received = pd.merge(from_received, to_spent, on = ["club"]).drop_duplicates()  
  nodes_attr = pd.merge(nodes_attr, spent_received, on = ["club"])
  nodes_attr['profit'] = nodes_attr['received'] - nodes_attr['spent']

  return nodes_attr

def set_nodes_attributes(G, nodes_attr):
  nodes_cont = dict(zip(nodes_attr.club, nodes_attr.cont))
  nodes_country = dict(zip(nodes_attr.club, nodes_attr.country))
  nodes_league = dict(zip(nodes_attr.club, nodes_attr.league))
  nodes_league_class = dict(zip(nodes_attr.club, nodes_attr.league_class))
  nodes_received = dict(zip(nodes_attr.club, nodes_attr.received))
  nodes_spent = dict(zip(nodes_attr.club, nodes_attr.spent))
  nodes_profit = dict(zip(nodes_attr.club, nodes_attr.profit))
  names = dict(map(lambda node: (node[0], (node[0])), dict(G.degree).items()))

  nx.set_node_attributes(G, nodes_cont, 'continent')
  nx.set_node_attributes(G, nodes_country, 'country')
  nx.set_node_attributes(G, nodes_league, 'league')
  nx.set_node_attributes(G, nodes_received, 'received')
  nx.set_node_attributes(G, nodes_spent, 'spent')
  nx.set_node_attributes(G, nodes_profit, 'profit')
  nx.set_node_attributes(G, nodes_league_class, 'league_class')
  nx.set_node_attributes(G, names, 'name')

  new_sizes = dict(map(lambda node: (node[0], box_cox_normalization(node[1])), dict(G.degree).items()))
  degrees = dict(map(lambda node: (node[0], (node[1])), dict(G.degree).items()))
  fees_dict = dict(nx.get_edge_attributes(G,"fee")).items() 
  ages_dict = dict(nx.get_edge_attributes(G,"age")).items()
  types_dict = dict(nx.get_edge_attributes(G,"type")).items()

  fees = np.array(list(dict(nx.get_edge_attributes(G,"fee")).values()))
  ages = np.array(list(dict(nx.get_edge_attributes(G,"age")).values()))

  avg_fee = np.mean(fees)
  sd_fee = np.std(fees)
  
  new_fees = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  new_ages = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  edge_colors = dict(map(lambda edge: ( edge[0], edge_color(edge[1]) ), types_dict ))

  nx.set_node_attributes(G, dict(G.degree), 'connections')
  nx.set_node_attributes(G, new_sizes, 'node_size')
  nx.set_node_attributes(G, degrees, 'degree')
  nx.set_edge_attributes(G, new_fees, 'edge_width')
  nx.set_edge_attributes(G, new_ages, 'edge_alpha')
  nx.set_edge_attributes(G, edge_colors, 'edge_color')
  nx.set_node_attributes(G,dict(G.out_degree()),"outgoing_edges")
  nx.set_node_attributes(G,dict(G.in_degree()),"incoming_edges")
  return G

In [14]:
def group_league(transfers_df):
  by_leagues = (transfers_df >> 
  group_by(X.from_league, X.to_league, X["type"]) >>
  summarize(fee = X.fee.sum(), count = X.index.nunique(),
            age = X.age.mean(),
            from_country = X.from_country.unique()[0],
            from_league_class = X.from_league_class.unique()[0],
            from_cont = X.from_cont.unique()[0],
            from_league_received = X.received_league.sum(),
            to_country = X.to_country.unique()[0],
            to_league_class = X.to_league_class.unique()[0],
            to_cont = X.to_cont.unique()[0],
            to_league_spent = X.spent_league.sum(),                   
            ))
  return by_leagues

def league_node_attrs(by_leagues):
  from_league_info = (by_leagues >> 
                    group_by(X.from_league) >> 
                    summarize( 
                        country = X.from_country.unique()[0],
                        cont = X.from_cont.unique()[0],
                        league_class = X.from_league_class.unique()[0],
                        received = X.from_league_received.unique()[0],
                     ))
  from_league_info.rename({"from_league":"league"},axis = "columns", inplace = True)
  to_league_info = (by_leagues >> 
                      group_by(X.to_league) >> 
                      summarize( 
                          country = X.to_country.unique()[0],
                          cont = X.to_cont.unique()[0],
                          league_class = X.to_league_class.unique()[0],
                          spent = X.to_league_spent.unique()[0],
                      ))
  to_league_info.rename({"to_league":"league"},axis = "columns", inplace = True)
  node_attrs = pd.merge(from_league_info,to_league_info, on = ["league","league_class","country","cont"])
  node_attrs["profit"] = node_attrs["received"] - node_attrs["spent"]
  return node_attrs

def league_attributes(G, node_attrs):
  node_names = dict(map(lambda node: (node[0], (node[0])), dict(G.degree).items()))
  node_country = dict(zip(node_attrs.league, node_attrs.country))
  node_cont = dict(zip(node_attrs.league, node_attrs.cont))
  node_league_class = dict(zip(node_attrs.league, node_attrs.league_class))
  node_received = dict(zip(node_attrs.league, node_attrs.received))
  node_spent = dict(zip(node_attrs.league, node_attrs.spent))
  node_profit = dict(zip(node_attrs.league, node_attrs.profit))
  new_sizes = dict(map(lambda node: (node[0], box_cox_normalization(node[1])), dict(G.degree).items()))
  degrees = dict(map(lambda node: (node[0], node[1]), dict(G.degree).items()))

  nx.set_node_attributes(G, node_names, "name")
  nx.set_node_attributes(G, node_country, "country")
  nx.set_node_attributes(G, node_cont, "continent")
  nx.set_node_attributes(G, node_league_class, "league_class")
  nx.set_node_attributes(G, node_received, "received")
  nx.set_node_attributes(G, node_spent, "spent")
  nx.set_node_attributes(G, node_profit, "profit")
  nx.set_node_attributes(G, degrees,'degree')
  nx.set_node_attributes(G,dict(G.out_degree()),"outgoing_edges")
  nx.set_node_attributes(G,dict(G.in_degree()),"incoming_edges")

  types_dict = dict(nx.get_edge_attributes(G,"type")).items()
  edge_colors = dict(map(lambda edge: ( edge[0], edge_color(edge[1]) ), types_dict))
  nx.set_edge_attributes(G, edge_colors, 'edge_color')

  fees = np.array(list(dict(nx.get_edge_attributes(G,"fee")).values()))
  avg_fee = np.mean(fees)
  sd_fee = np.std(fees)
  fees_dict = dict(nx.get_edge_attributes(G,"fee")).items() 
  ages_dict = dict(nx.get_edge_attributes(G,"age")).items()
  nx.set_node_attributes(G, new_sizes, 'node_size')
  new_fees = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  new_ages = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  nx.set_edge_attributes(G, new_fees, 'edge_width')
  nx.set_edge_attributes(G, new_ages, 'edge_alpha')
  nx.set_node_attributes(G, dict(G.degree), 'connections')

  return G

In [15]:
def get_subgraph(G, attr, value):
  filtered = [x for x,y in transfers_G.nodes(data=True) if y[attr] in (value)]
  subgraph = transfers_G.subgraph(filtered)
  return subgraph


def plot_net(G,title,**kwargs):
  node_color = kwargs["node_color"]
  colors = list(set(nx.get_node_attributes(G,node_color).values()))
  node_size = kwargs["node_size"]
  hover_data = []
  edge_alpha = kwargs["edge_alpha"]
  edge_width = kwargs["edge_width"]
  edge_color = kwargs["edge_color"]
  
  for attr in kwargs["hover_data"]:
    name = "@"+attr
    hover_data.append((attr, name))

  plot = Plot(plot_width=700, plot_height=500,
              x_range = Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1))
  plot.title.text = title

  node_hover_tool = HoverTool(tooltips = hover_data)
  plot.add_tools(node_hover_tool, BoxZoomTool(), ResetTool(), PanTool(),TapTool())
  layout = kwargs["layout"]

  graph_renderer = from_networkx(G, layout, scale=1, center=(0, 0))
  graph_renderer.node_renderer.glyph = Circle(size = node_size, fill_color = factor_cmap(node_color,Colorblind[7], colors))

  graph_renderer.edge_renderer.glyph = MultiLine(
      line_alpha = edge_alpha, 
      line_width = edge_width,
      line_color = edge_color
  )
  graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color='blue', line_width=edge_width)
  graph_renderer.selection_policy = NodesAndLinkedEdges()

  plot.renderers.append(graph_renderer)
  output_notebook()
  show(plot)

def call_plot(G,title,layout,node_size = "node_size", node_color = "continent", edge_color = "edge_color",
          edge_alpha = "edge_alpha", edge_width = "edge_width"):
  # 
  hover_data = ["connections","country","continent","league_class","name","spent","received","profit","incoming_edges","outgoing_edges"]
  plot_net(G,title,node_size = node_size, node_color = node_color, edge_color = edge_color,
          edge_alpha = edge_alpha, edge_width = edge_width, hover_data = hover_data, layout = layout)

In [16]:
def network_stats(G):
  print("Network's density")
  print(nx.density(G))

  print("Network's reciprocity")
  print(nx.reciprocity(G))

  print("Network's assortavity based on continent")
  print(nx.attribute_assortativity_coefficient(G,'continent'))

  print("Network's assortavity based on league_class")
  print(nx.attribute_assortativity_coefficient(G,'league_class'))

  print("Network's assortavity based on country")
  print(nx.attribute_assortativity_coefficient(G,'country'))

  print("Network's assortavity based on degrees")
  print(nx.degree_assortativity_coefficient(G))


def network_info(G):
  out_df = pd.DataFrame.from_dict(dict(list(G.out_degree)), orient = "index").reset_index()
  out_df.columns = ['club','outgoing_count']
  print("outgoing edges")
  print(out_df.sort_values("outgoing_count",ascending = False))

  in_df = pd.DataFrame.from_dict(dict(list(G.in_degree)), orient = "index").reset_index()
  in_df.columns = ['club','incoming_count']
  print("incoming edges")
  print(in_df.sort_values("incoming_count",ascending = False))

  degree_df = pd.DataFrame.from_dict(nx.degree_centrality(G),orient = "index").reset_index()
  degree_df.columns = ['club','centrality']
  print("degree centrality")
  print(degree_df.sort_values('centrality',ascending = False))

  in_degree_df = pd.DataFrame.from_dict(nx.in_degree_centrality(G),orient = "index").reset_index()
  in_degree_df.columns = ['club','in_centrality']
  print("incoming degree centrality")
  print(in_degree_df.sort_values('in_centrality',ascending = False))

  out_degree_df = pd.DataFrame.from_dict(nx.out_degree_centrality(G),orient = "index").reset_index()
  out_degree_df.columns = ['club','out_centrality']
  print("outgoing degree centrality")
  print(out_degree_df.sort_values('out_centrality',ascending = False))

  profit_df = pd.DataFrame.from_dict(nx.get_node_attributes(G, "profit"), orient = "index").reset_index()
  profit_df.columns = ['club','profit']
  print("Profit made by teams")
  print(profit_df.sort_values('profit',ascending = False))

In [17]:
edges_cols = ['loan','type','fee','mv','name','continent','nationality','main_field_position',
                    'field_position','age','season','date']
transfers_df = transfers_detailed(transfers_df)
nodes_attr = nodes_attributes(transfers_df)
clubs = nodes_attr.club.unique()
transfers_df = transfers_df[transfers_df["from"].isin(clubs) & transfers_df["to"].isin(clubs)]
transfers_G = nx.from_pandas_edgelist(transfers_df, 'from','to', edges_cols, create_using=nx.MultiDiGraph)
transfers_G = set_nodes_attributes(transfers_G, nodes_attr)
remove = [node for node,degree in dict(transfers_G.degree()).items() if degree < 18]
transfers_G.remove_nodes_from(remove)

# Soccer's Transfers Network by teams

In [18]:
call_plot(transfers_G, "Soccer Transfers Network", nx.spring_layout)

As the networks contains a lot of nodes, we cannot get much information from the visualization. However we can see that the european teams dominate in the market, with majority of them connected with each other, and also we can see that after europen teams, and majority of the transfers happen during the summer transfer window, except for some Asian teams, whick make most of their deals during the winter transfer window.

In [19]:
from networkx.algorithms import community
communities_generator = community.girvan_newman(transfers_G)

In [20]:
next_level_communities = next(communities_generator)
next_1_level_communities = next(communities_generator)
next_2_level_communities = next(communities_generator)
next_3_level_communities = next(communities_generator)

In [21]:
next_4_level_communities = next(communities_generator)
next_5_level_communities = next(communities_generator)

In [22]:
len(next_5_level_communities)

7

In [23]:
i = 1
for community in next_5_level_communities:
  for node in community:
    transfers_G.nodes()[node]["community"] = str("community") + str(i)
  i = i + 1


In [24]:
call_plot(transfers_G, "Soccer Transfers Network Communities", nx.spring_layout, node_color = "community")

## Network Statistics

In [26]:
network_stats(transfers_G)

Network's density
0.08127920560747663
Network's reciprocity
0.6419930530602468
Network's assortavity based on continent
0.6276592157476384
Network's assortavity based on league_class
0.3793460193657123
Network's assortavity based on country
0.5635132471197509
Network's assortavity based on degrees
0.2706955281730688


ValueError: If using all scalar values, you must pass an index

As we can see the networks density is very low, which is logical as we have many teams, and not all of them have connections between each other. However the reciprocity of the network is relatively high, as most of the teams that make deals with each other have transfers in opposite directions too. The main attributes for the assortiativity of the teams is their continent and country, as it is easier for player's to move to another team that is in the same continent where they play, and even more when it happens in the same country. The metric is around 0.5, as most of the talented players from other continents and non EU countries tend to move to european soccer clubs, as there they have higher chances of succeeding. League's class has the lowest effect on the assortiativity as most of the time players from leagues with lower ranking tend to move to higher ranked leagues. Degree of the node also has relatively low connection to the assortativity of the nodes, as teams with a low number of connections not always are connected to teams with a lot of connections. 

In [None]:
network_info(transfers_G)

We can see that the most central team of the network is Chelsea, and top 5 contains other Italian teams, and the lowest centrality is among not popular teams. Almost the same situation for in degree and out degree centrality. Finally we can see that most that the profitable teams are Benfica, Porto and Ajax, as they are famous in the whole world for rising and selling young talents, whereas the top teams such as Man City and Barcelona are on lowest places in terms of making profits from transfers.

In [None]:
loans_df = transfers_df[transfers_df.loan == True]
# loans_df = transfers_detailed(loans_df)
nodes_attr = nodes_attributes(loans_df)
clubs = nodes_attr.club.unique()
loans_df = loans_df[loans_df["from"].isin(clubs) & loans_df["to"].isin(clubs)]
loans_G = nx.from_pandas_edgelist(loans_df, 'from','to', edges_cols, create_using=nx.MultiDiGraph)
loans_G = set_nodes_attributes(loans_G, nodes_attr)
remove = [node for node,degree in dict(loans_G.degree()).items() if degree < 15]
loans_G.remove_nodes_from(remove)

In [None]:
transfers_df

# Soccer's Loans Network by teams

In [None]:
call_plot(loans_G, "Soccer Loans' Network", nx.spring_layout)

## Network Statistics

The situation is almost the same situation in terms of main teams of the network. However we can see that, more young players are involved in loans taking place during the winter transfer window(black color for window, low opacity for young players). Also we can see two teams from South Korea that are isolated

In [None]:
network_stats(loans_G)

Almost the same metrics as for transfers network, except almost maximal value for reciprocity, which is logical as in most of the cases player who is loaned to another club comes back to his club, and only in some cases the club that loaned the player buys him.

In [None]:
by_leagues = group_league(transfers_df)
node_attrs = league_node_attrs(by_leagues)
leagues_G = nx.from_pandas_edgelist(by_leagues, 'from_league','to_league', ['fee','count','age','type'], create_using=nx.MultiDiGraph)
leagues_G = league_attributes(leagues_G, node_attrs)

# Soccer's Transfers Network by leagues

In [None]:
call_plot(leagues_G, "Soccer Transfers' Network by Leagues", nx.spring_layout, node_size = "node_size")

If we take the leagues as nodes, we can see that teams form the South Korean league have  the lowest connection to the other leagues' teams. In general the graph is very interconnected and almost all of the leagues have direct links to each other.

## Network Statistics

In [None]:
network_stats(leagues_G)

As we can see the density of the network is very high, and as it is a multigraph, where two nodes can have more than one edge between them. As we have taken the league as a node, the reciprocity is very high compared to the graph, where the nodes were the teams. In terms of assortiativity the highest effect has the continent of the leagues, and degree of the node has the lowest effect, which is even negative.

In [None]:
network_info(leagues_G)

The main member of the network is Netherland's league in terms of outgoing transfers. Championship premier league and mls have the highest number of incoming edges. Eredivisie has the highest centrality measures and Championship has the highest in degree centrality measure. Bundesliga makes the most profit in transfers, while Premier League teams spend much more than they receive in transfers.

In [None]:
leagues_loans = transfers_df[transfers_df.loan]
by_leagues = group_league(leagues_loans)
node_attrs = league_node_attrs(by_leagues)
leagues_loans_G = nx.from_pandas_edgelist(by_leagues, 'from_league','to_league', ['fee','count','age','type'], create_using=nx.MultiDiGraph)
leagues_loans_G = league_attributes(leagues_loans_G, node_attrs)

# Soccer's Loans Network by leagues

In [None]:
call_plot(leagues_loans_G, "Soccer's Loans Network by leagues", nx.spring_layout, node_size = 'node_size')

The loans network is very similar to the transfers network but is less dense.

## Network Statistics

In [None]:
network_stats(leagues_loans_G)

The main difference in the metrics except lower density and reciprocity, is that the assortiativity degree based on country is higher, as most of the teams loan out their players to lower leagues in their country, so that they can gain gaming practice.

In [None]:
network_info(leagues_loans_G)

Premier league has the highest centrality measure, while Bundesliga and Championship have the highest out degree and in degree centralities accordingly. Championship made the highest profit in loans and Premier League the lowest.

In [None]:
winter_t = transfers_df[transfers_df["type"] == "winter"]
by_leagues_s = group_league(winter_t)
node_attrs = league_node_attrs(by_leagues_s)
winter_leagues_G = nx.from_pandas_edgelist(by_leagues_s, 'from_league','to_league', ['fee','count','age','type'], create_using=nx.MultiDiGraph)
winter_leagues_G = league_attributes(winter_leagues_G, node_attrs)

# Soccer's Winter Transfers Network by teams

In [None]:
call_plot(winter_leagues_G, "Winter Transfers Network by leagues", nx.spring_layout, node_size = "incoming_edges")

We can see that the Asian clubs are most active memebers of the winter transfer window being connected to almost all of the other leagues. Chinese league alongside with MlS and Spanish league are the most active member of the transfer market's winter window in terms of buying players.

In [None]:
network_stats(winter_leagues_G)

The network is not very dense in comparison with transfers during all windows, the main attribute for assortiativity degree is continent, and degree of the node has negative effect on it's assortiativity.

## Network Statistics

In [None]:
network_info(winter_leagues_G)

The central member in almost all measures is Chinese super league, as most of the clubs there got richer in a winter period and made a lot of expensive transfers in the winter transfer window.

# Network of teams inside one league

In [None]:
league_subgraph = get_subgraph(leagues_G, "country", "England")

In [None]:
network_stats(league_subgraph)

In [None]:
network_info(league_subgraph)

In [None]:
#@title **Network by country**
country = "Italy" #@param ['Japan','Italy','Belgium','Spain','Mexico','Sweden','Saudi Arabia','Turkey','Brazil','Denmark','Netherlands','Portugal','France','Colombia','Germany','China','Norway','Argentina','Poland','England','Scotland','United States','Australia','Korea, South']
league_subgraph = get_subgraph(leagues_G, "country", country)
title = "Spanish league transfers network" #@param string
call_plot(league_subgraph, title, nx.circular_layout, node_color = "name")

# Node2Vec

In [None]:
!pip install node2vec
from node2vec import Node2Vec
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import plotly.express as px

In [None]:
model = Node2Vec(transfers_G, dimensions=16, p=1, q=2, num_walks=200, walk_length=6) 

In [None]:
embeddings = model.fit()

In [None]:
embeddings.most_similar('Real Madrid')

In [None]:
def plot_sim_nodes(embeddings, nodes):
  arrays = np.empty((0, 16), dtype='f')
  nodes_array = np.array([])
  sim_nodes_array = np.array([])
  scores_array = np.array([])
  for node in nodes:
    sim_nodes = embeddings.most_similar(node)
    for sim_node in sim_nodes:
      close_node = list(sim_node)[0]
      arrays = np.append(arrays, embeddings[close_node].reshape(1,16), axis=0)
      nodes_array = np.append(nodes_array, [node])
      sim_nodes_array = np.append(sim_nodes_array, [close_node])
      scores_array = np.append(scores_array, list(sim_node)[1])
  Y = PCA(n_components=2).fit_transform(arrays)
  df = pd.DataFrame({'x': [x for x in Y[:, 0]],
                      'y': [y for y in Y[:, 1]],
                      'sim_node': sim_nodes_array,
                      'node': nodes_array,
                      'score' : scores_array
                     })
  fig = px.scatter(df, "x", "y", hover_data=['sim_node',"node", "score"],
                   color = 'node', text = "sim_node", labels = {"x":"", "y":""},
                   title = "Similar teams based on the network",)
  fig.update_traces(textposition='top center')
  fig.show()

In [None]:
plot_sim_nodes(embeddings, ['FC Porto','Benfica'])

In [None]:
list(dict(nx.get_node_attributes(transfers_G,"name")).values())