In [10]:
import os
import numpy as np
import pandas as pd
import networkx as nx

In [94]:
a = pd.read_csv('country_player_history/Brazil.csv')
a[a.name.str.contains('Neymar')].drop(columns = ['current_league', 'current_team', 'shirt_number', 'market_value'])

Unnamed: 0,name,current_market_value,country,from_date,to_date,old_team,new_team,transfer_fee
152,Neymar,€75.00m,Brazil,"Aug 3, 2017",,Paris SG,,
153,Neymar,€75.00m,Brazil,"Jul 1, 2013","Aug 3, 2017",Barcelona,Paris SG,222000000.0
154,Neymar,€75.00m,Brazil,"Jan 1, 2009","Jul 1, 2013",Santos FC,Barcelona,88000000.0
155,Neymar,€75.00m,Brazil,,"Jan 1, 2009",Santos FC U20,Santos FC,


In [95]:
sum([len(pd.read_csv('country_player_history/' + fn)) for fn in os.listdir('country_player_history')])


6648

### 1. Parse the country colors

#### 1.1. Color the player network

In [96]:
countries = [c.split('.csv')[0] for c in os.listdir('country_player_history')]
len(countries)

32

In [97]:
df_cntr = pd.read_excel('country_colors.xlsx')[['country', 'hex']]
len(set(df_cntr.country).intersection(set(countries)))

29

In [98]:
set(countries).difference(set(df_cntr.country))

{'England', 'United States', 'Wales'}

In [99]:
colors_d = df_cntr[df_cntr.country.isin(countries)].set_index('country').to_dict()['hex']
colors_d.update({'United States' : '#3C3B6E', 'Wales' : '#CF1E26', 'England' : '#FFFFFF'})
colors_d

{'Argentina': '#74ACDF',
 'Australia': '#00008B',
 'Belgium': '#FAE042',
 'Brazil': '#009B3A',
 'Cameroon': '#FCD116',
 'Canada': '#FF0000',
 'Costa Rica': '#002B7F',
 'Croatia': '#FF0000',
 'Denmark': '#C60C30',
 'Ecuador': '#FFDD00',
 'France': '#002395',
 'Germany': '#000000',
 'Ghana': '#006B3F',
 'Iran': '#FFFFFF',
 'Japan': '#BC002D',
 'Mexico': '#006847',
 'Morocco': '#C1272D',
 'Netherlands': '#21468B',
 'Poland': '#DC143C',
 'Portugal': '#006600',
 'Qatar': '#8D1B3D',
 'Saudi Arabia': '#006C35',
 'Senegal': '#00853F',
 'Serbia': '#C6363C',
 'South Korea': '#030303',
 'Spain': '#FFC400',
 'Switzerland': '#FF0000',
 'Tunisia': '#E70013',
 'Uruguay': '#9E830E',
 'United States': '#3C3B6E',
 'Wales': '#CF1E26',
 'England': '#FFFFFF'}

### 2. Color teams based on countries

In [11]:
G = nx.read_gexf('player_teammate_network.gexf')
G.number_of_nodes(), G.number_of_edges()

FileNotFoundError: [Errno 2] No such file or directory: 'player_teammate_network.gexf'

In [101]:
names_colors = {}
for fn in os.listdir('countries_players'):
    country = fn.split('_pla')[0]
    color   = colors_d[country]
    df      = pd.read_csv('countries_players/' + fn, index_col = 0)
    names   = df.name.to_list()
    names_colors.update({n : color for n in names})

len(names_colors)

830

In [102]:
df_colors = pd.DataFrame(list(G.nodes()))
df_colors['color'] = df_colors[0].map(names_colors)
df_colors.set_index(0, inplace = True)
df_colors.index.name='Id'
df_colors.to_csv('node_colors_people_country.csv')
df_colors

Unnamed: 0_level_0,color
Id,Unnamed: 1_level_1
Aaron Long,#3C3B6E
Cristian Roldán,#3C3B6E
DeAndre Yedlin,#3C3B6E
Tyler Adams,#3C3B6E
Aaron Mooy,#00008B
...,...
Yu-min Cho,#030303
Munir,#C1272D
Samuel Oum Gouet,#FCD116
Pathé Ciss,#00853F


In [103]:
limit = 59999999
df_nodes = pd.read_csv('node_values.csv')
labels  = df_nodes.Id.to_list()
print(sum([x > limit for i, x in enumerate(df_nodes.current_market_value.to_list())]))
df_nodes['newlabel'] = [labels[i] if x > limit else '' for i, x in enumerate(df_nodes.current_market_value.to_list())]
df_nodes.set_index('Id', inplace = True)
df_nodes[['newlabel']].to_csv('node_labels_people_country.csv')

51


#### 1.2. Color the team network

In [14]:
clubs_countries = pd.read_csv('team_league_v2.csv').set_index('current_team').to_dict()['current_league_country']

G_club = nx.read_gexf('club_migration_network.gexf')
print(G_club.number_of_nodes(), G_club.number_of_edges())

df_club = pd.DataFrame(G_club.nodes(), columns = ['Id'])

643 1207


In [15]:
s2 = set(clubs_countries)

In [16]:
s1 = set(df_club.Id)

In [17]:
len(s1), len(s2), len(s1.intersection(s2)), len(s2.difference(s1))

(643, 293, 239, 54)

In [22]:
df_club

Unnamed: 0,Id
0,Man Utd
1,Ajax
2,Werder Bremen
3,Everton
4,PSV Eindhoven
5,Inter
6,Heerenveen
7,Sparta R.
8,Barcelona
9,Willem II


In [18]:
list(s2.difference(s1))

['AFC Wimbledon',
 'Asante Kotoko',
 'Alanyaspor',
 'Bilbao Athletic',
 'Los Angeles',
 'Salt Lake',
 'FC Juárez',
 'Columbus',
 'Portsmouth',
 'Kayserispor',
 'Gimcheon Sangmu',
 'MK Dons',
 'Luton',
 'Houston',
 'Cremonese',
 'CD Lugo',
 'Millonarios',
 'Fagiano Okayama',
 'Hearts of Oak',
 'Coton Sport FC',
 'Colorado 2',
 'KMSK Deinze',
 'KAS Eupen',
 'Puntarenas FC',
 'Guanacasteca',
 'Deportivo Guadalajara',
 'St. Mirren',
 'Atromitos Athen',
 'Al-Wakrah SC',
 'Al-Ahli SC',
 'Heart of Midl.',
 'Shabab Dubai',
 'Miami',
 'Grecia',
 'Sydney FC',
 'Shonan Bellmare',
 'Cádiz CF',
 'KV Mechelen',
 'St. Johnstone',
 'Ponferradina',
 'Swindon Town',
 'US Monastir',
 'FC Seoul',
 'Imbabura SC',
 'Club Nacional',
 'Nagoya Grampus',
 'Aris Saloniki',
 'SH Shenhua',
 'Omonia Nikosia',
 'Vejle BK',
 'Panetolikos',
 'Al-Fateh',
 'AD San Carlos',
 'Kuwait SC']

In [83]:
colors_d = df_cntr[df_cntr.country.isin(set(pd.read_csv('team_league.csv').current_league_country))].set_index('country').to_dict()['hex']
colors_d.update({'United States' : '#3C3B6E', 'Wales' : '#CF1E26', 'England' : '#FFFFFF'})

df_club['country'] = df_club.Id.map(clubs_countries)
df_club['color'] = df_club.country.map(colors_d)
df_club['color'] = df_club['color'].fillna('#808080')
df_club.set_index('Id').to_csv('club_colors.csv')
print(len(df_club.dropna()))
df_club

239


Unnamed: 0,Id,country,color
0,Man Utd,England,#FFFFFF
1,Ajax,Netherlands,#21468B
2,Werder Bremen,Germany,#000000
3,Everton,England,#FFFFFF
4,PSV Eindhoven,Netherlands,#21468B
...,...,...,...
638,El Mokawloon,,#808080
639,Zamalek,Egypt,#000000
640,Stade Gabèsien,,#808080
641,Al-Ain FC,,#808080


In [68]:
df_node_feat = pd.read_csv('club_migration_networks_v4_node_attributes.csv')
top50 = list(df_node_feat.sort_values(by = 'Weighted Degree', ascending = False).head(25).Label)

def get_new_label(x, top50):
    if x in top50:
        return x
    else:
        return ''
    

df_node_feat['newlabel'] = df_node_feat.Label.apply(lambda x: get_new_label(x, top50))
df_node_feat.set_index('Id', inplace = True)
df_node_feat[['newlabel']].to_csv('node_clubs_label.csv')

In [114]:
from collections import Counter
Counter(pd.read_csv('club_colors.csv').color).most_common(3)

[('#808080', 409), ('#FFFFFF', 37), ('#000000', 23)]

In [115]:
len(pd.read_csv('club_colors.csv'))

643

In [None]:
CIKK CLUB NETWORK SZÁMOK ÁTJAVÍTANI