In [1]:
from glob import glob 
from slugify import slugify
import numpy as np
import pandas as pd
import json
import os

## Countries, teams and players

In [2]:
def load_teams_players():
    teams = []
    players = []
    countries = {}
    teams_files = glob("scoreboard-players/teams/*")
    for f in teams_files:
        with open(f, "r") as r:
            team = json.load(r)
            if "squad" in team:
                for player in team["squad"]:
                    country_slug = slugify(player["country"])
                    if not country_slug: continue
                    player_id = player["link"][11:-1].replace("/","_")
                    kind = player["kind"]
                    type_ = slugify(player["type"])
                    name = player["name"]
                    jersey = player["jersey"] if player["jersey"] != -1 else None
                    age = player["age"] if player["age"] != -1 else None
                    current_team = team['id']
                    players.append([player_id, name, country_slug, 
                                    kind, type_, jersey, age, current_team])
                    if country_slug not in countries:
                        countries[country_slug] = player["country"]
                del team["squad"]
            teams.append([team["id"], team["name"], team["country"]])
        del team
    return teams, players, [(k,v) for k,v in countries.items()]

if os.path.exists("scoreboard-players/players.csv") and\
    os.path.exists("scoreboard-players/teams.csv") and\
    os.path.exists("scoreboard-players/countries.json"):
    players_df = pd.read_csv("scoreboard-players/players.csv", index_col=0)
    teams_df = pd.read_csv("scoreboard-players/teams.csv", index_col=0)
    countries_df = pd.read_csv("scoreboard-players/countries.csv", index_col=0)
else:
    teams, players, countries = load_teams_players()
    players_df = pd.DataFrame(players, columns=['id', 'name', 'country',
                                              'kind', 'type', 
                                              'jersey', 'age', 'current_team'])
    teams_df = pd.DataFrame(teams, columns=['id', 'name', 'country'])
    countries_df = pd.DataFrame(countries, columns=['id', 'name'])

In [3]:
print(players_df.info())
players_df.to_csv('scoreboard-players/players.csv')
print(teams_df.info())
teams_df.to_csv('scoreboard-players/teams.csv')
print(countries_df.info())
countries_df.to_csv('scoreboard-players/countries.csv')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57417 entries, 0 to 57416
Data columns (total 8 columns):
id              57417 non-null object
name            57417 non-null object
country         57417 non-null object
kind            57417 non-null object
type            57417 non-null object
jersey          54068 non-null float64
age             50530 non-null float64
current_team    57417 non-null object
dtypes: float64(2), object(6)
memory usage: 3.5+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11869 entries, 0 to 11868
Data columns (total 3 columns):
id         11869 non-null object
name       11869 non-null object
country    11869 non-null object
dtypes: object(3)
memory usage: 278.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199 entries, 0 to 198
Data columns (total 2 columns):
id      199 non-null object
name    199 non-null object
dtypes: object(2)
memory usage: 3.2+ KB
None


## Leagues

In [5]:
def load_leagues():
    leagues = []
    leagues_teams = []
    leagues_files = glob("scoreboard-players/*_*.json")
    for league_file in leagues_files:
        id_ = league_file[19:-5]
        with open(league_file, "r") as r:
            league = json.load(r)
        leagues.append([id_, league['country'], league["name"]])
        for t in league["teams"]:
            leagues_teams.append([id_, t])
    return leagues, leagues_teams

if os.path.exists("scoreboard-players/leagues.csv") and\
    os.path.exists("scoreboard-players/leagues_teams.csv"):
    leagues_df = pd.read_csv("scoreboard-players/leagues.csv", index_col=0)
    leagues_teams_df = pd.read_csv("scoreboard-players/leagues_teams.csv", index_col=0)
else:
    leagues, leagues_teams = load_leagues()
    leagues_df = pd.DataFrame(leagues, columns=['id', 'country', 'name'])
    leagues_teams_df = pd.DataFrame(leagues_teams, columns=['league', 'team'])

In [6]:
print(leagues_df.info())
leagues_df.to_csv('scoreboard-players/leagues.csv')
print(leagues_teams_df.info())
leagues_teams_df.to_csv('scoreboard-players/leagues_teams.csv')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 957 entries, 0 to 956
Data columns (total 3 columns):
id         957 non-null object
country    957 non-null object
name       957 non-null object
dtypes: object(3)
memory usage: 22.5+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23818 entries, 0 to 23817
Data columns (total 2 columns):
league    23818 non-null object
team      23818 non-null object
dtypes: object(2)
memory usage: 372.2+ KB
None


## Scripts

### Country  

```
LOAD CSV WITH HEADERS FROM 'file:///countries.csv' as line
CREATE (c:Country{id:line["id"], name:line["name"]})
```
___________________
```
CREATE INDEX ON :Country(id)
CREATE INDEX ON :Country(name)
```

### Teams  

```
LOAD CSV WITH HEADERS FROM 'file:///teams.csv' as line
CREATE (team:Team{id:line.id, name:line.name})
WITH team, line
MATCH(c:Country{id:line.country})
MERGE (team)-[:LOCATED_IN]->(c)
```
_______________
```
CREATE INDEX ON :Team(id)
CREATE INDEX ON :Team(name)
```

### Players    

```
LOAD CSV WITH HEADERS FROM 'file:///players.csv' as line
CREATE (p:Person{
    id:line["id"], 
    name:line["name"], 
    age:toInteger(line.age), 
    jersey:toInteger(line.jersey)})
WITH p, line

FOREACH(n IN (CASE WHEN line.type IN ["goalkeepers", "midfielders", "defenders", "forwards"] THEN [1] else [] END) | 
    SET p:Player,
        p.type=left(line.type,length(line.type)-1)
)

FOREACH(n IN (CASE WHEN line.type IN ["coach"] THEN [1] else [] END) | 
    SET p:Coach
)

WITH p, line
MATCH (country:Country{id:line.country})
MATCH (team:Team{id:line.current_team})
MERGE (p)-[:WAS_BORN]->(country)
MERGE (p)-[:MEMBER_OF]->(team)
```
___________________
```
CREATE INDEX ON :Person(id)
CREATE INDEX ON :Person(name)
```

### Leagues

```
LOAD CSV WITH HEADERS FROM 'file:///leagues.csv' as line
CREATE (l:League{id:line.id, name:line.name})
WITH l,line
MATCH (country:Country{id:line.country})
MERGE (l)-[:LOCATED_IN]->(country)
```
___________________
```
CREATE INDEX ON :League(id)
CREATE INDEX ON :League(name)
```

### Leagues - Teams

```
LOAD CSV WITH HEADERS FROM 'file:///leagues_teams.csv' as line
MATCH (league:League{id:line.league})
MATCH (team:Team{id:line.team})
MERGE (team)-[:PARTICIPATES_IN]->(league)
```

#### End result:

<img src="graph.svg" height="64" alt="Nice green circle"/>