In [14]:
import requests
import math
import time
import json

from tqdm import tqdm

In [15]:
def get_epl_table():
    url = 'https://footballapi.pulselive.com/football/standings?compSeasons=418&altIds=true&detail=2&FOOTBALL_COMPETITION=1'
    payload = requests.get(url, headers = { "origin": "https://www.premierleague.com"}).json()
    table = payload["tables"][0]
    return table["entries"]

In [16]:
with open("data/epl_table.json", "w") as f:
    table = get_epl_table()
    f.write(json.dumps(table))

In [None]:
def get_team(teamId: int):
    url = 'https://footballapi.pulselive.com/football/teams/%s/compseasons/418/staff?pageSize=50&compSeasons=418&altIds=true&page=0&type=player' % str(teamId)
    return requests.get(url, headers = { "origin": "https://www.premierleague.com"}).json()

In [17]:
all_teams = [get_team(t["team"]["id"]) for t in table]
with open("data/raw_team_data.json", "w") as f:
    f.write(json.dumps(all_teams))

In [42]:
def is_invalid(raw_player):
    return "date" not in raw_player["birth"] or "country" not in raw_player["nationalTeam"]


def clean_player(team, raw_player):
    playerId = int(raw_player["id"])
    optaId = player["altIds"]["opta"]
    position = raw_player["info"]["position"]
    nation = raw_player["nationalTeam"]
    dob = raw_player["birth"]["date"]["millis"]
    name = raw_player["name"]
    loan = raw_player["info"].get("loan", False)
    number = raw_player["info"]["shirtNum"]
    currentTeam = team["team"]["name"]

    return {
        "id": playerId,
        "optaId": optaId,
        "position": position,
        "number": number,
        "nation": nation,
        "dob": dob,
        "name": name,
        "team": currentTeam
    }
    
    
def get_player_stats(cleansed_player):
    url = 'https://footballapi.pulselive.com/football/stats/player/%s?comps=1&compSeasons=418' % str(int(cleansed_player["id"]))
    payload = requests.get(url, headers = { "origin": "https://www.premierleague.com"}).json()

    stats = payload["stats"]
    relevant_stats = {}
    
    stat_name_mapping = {
        "goals": "goals",
        "goal_assist": "assists",
        "appearances": "appearances",
        "mins_played": "mins_played"
    }
    
    for s in stats:
        if s["name"] in stat_name_mapping.keys():
            relevant_stats[stat_name_mapping[s["name"]]] = int(s["value"])
        
    for k in stat_name_mapping.values():
        if k not in relevant_stats:
            relevant_stats[k] = 0
            
    return relevant_stats

In [None]:
augmented_players = []
invalid = []
exceptions = []

for team in tqdm(all_players):
    for player in team["players"]:
    
        try:
            if is_invalid(player):
                invalid.append(player)

            else:
                filtered = clean_player(team, player)
                stats = get_player_stats(filtered)
                stats.update(filtered)
                augmented_players.append(stats)
                time.sleep(0.05)
        except:
            exceptions.append(player)
        
with open("data/augmented_players.json", "w") as f:
    f.write(json.dumps(augmented_players))

In [None]:
print(len(augmented_players))
print(len(invalid))
print(len(exceptions))

In [None]:
[p for p in augmented_players if p["name"]["first"] == "Kyle"]

In [None]:
## DOWNLOAD PHOTOS


## Get 40x40 photos
for team in tqdm(all_players):
    for player in team["players"]:
        if not is_invalid(player):
            opta_id = player["altIds"]["opta"]
            url = "https://resources.premierleague.com/premierleague/photos/players/40x40/%s.png" % opta_id
            with open("data/photos/40x40/%s.png" % opta_id, "wb") as f:
                f.write(requests.get(url).content)
            time.sleep(0.2)

## Get 250x250 photos
for team in tqdm(all_players):
    for player in team["players"]:
        if not is_invalid(player):
            opta_id = player["altIds"]["opta"]
            url = "https://resources.premierleague.com/premierleague/photos/players/250x250/%s.png" % opta_id
            with open("data/photos/250x250/%s.png" % opta_id, "wb") as f:
                f.write(requests.get(url).content)
        time.sleep(0.2)