# FIFA API: World Cup player stats

#### Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import numpy as np
import re
import requests

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get data

#### Read the teams into a dataframe

In [5]:
players = pd.read_csv("data/processed/world_cup_players.csv", dtype={"player_id": str})

In [6]:
players.head()

Unnamed: 0,player_id,name,position,weight,height,country,country_id,number,picture,dob,age,country_name,position_desc
0,433710,Ilias Chair,2,64.0,164.0,MAR,43872,13,https://digitalhub.fifa.com/transform/02b330d8...,1997-10-30,25.14,Morocco,Midfielder
1,401789,Collins Fai,1,70.0,165.0,CMR,43849,19,https://digitalhub.fifa.com/transform/0373615a...,1992-08-13,30.35,Cameroon,Defender
2,430405,Yuki Soma,2,68.0,166.0,JPN,43819,24,https://digitalhub.fifa.com/transform/923ad90a...,1997-02-25,25.82,Japan,Midfielder
3,251352,Andres Guardado,2,63.0,167.0,MEX,43911,18,https://digitalhub.fifa.com/transform/9aeabab1...,1986-09-28,36.23,Mexico,Midfielder
4,403586,Uriel Antuna,3,62.0,167.0,MEX,43911,21,https://digitalhub.fifa.com/transform/fd0fef42...,1997-08-21,25.33,Mexico,Striker


#### Read results from FIFA API for each team

In [7]:
url = "https://fdh-api.fifa.com/v1/stats/season/255711/players.json"

In [8]:
response = requests.get(url)
data = response.json()

In [9]:
src_dfs = []

for key, value in zip(data.keys(), data.values()):
    src_dfs.append(pd.DataFrame(value).assign(player=key))

In [10]:
src = pd.concat(src_dfs)

In [11]:
src.columns = ["stat", "value", "drop", "player"]

In [12]:
df_merged = pd.merge(
    players[["player_id", "name", "country_name"]],
    src[["stat", "value", "player"]],
    right_on="player",
    left_on="player_id",
)

In [13]:
df_merged["stat"] = (
    df_merged["stat"]
    .astype(str)
    .str.replace(r"(?<=\w)([A-Z])", r" \1", regex=True)
    .str.strip()
    .str.capitalize()
)

In [14]:
df_merged["value"] = df_merged["value"].round(0)

#### Just those who played

In [15]:
players_played = df_merged.query("stat == 'Time played' and value > 0")[
    "player_id"
].to_list()

In [16]:
df = df_merged[df_merged["player_id"].isin(players_played)].copy()

#### Christian Pulisic

In [17]:
df.query("name.str.contains('Messi')")

Unnamed: 0,player_id,name,country_name,stat,value,player
2175,229397,Lionel Messi,Argentina,Corners,20.0,229397
2176,229397,Lionel Messi,Argentina,Attempt at goal blocked,1.0,229397
2177,229397,Lionel Messi,Argentina,Crosses completed,6.0,229397
2178,229397,Lionel Messi,Argentina,Goal kicks,0.0,229397
2179,229397,Lionel Messi,Argentina,Passes,356.0,229397
2180,229397,Lionel Messi,Argentina,Red cards,0.0,229397
2181,229397,Lionel Messi,Argentina,Attempt at goal inside the penalty area on target,14.0,229397
2182,229397,Lionel Messi,Argentina,Attempt at goal on target,18.0,229397
2183,229397,Lionel Messi,Argentina,Assists,3.0,229397
2184,229397,Lionel Messi,Argentina,Goals inside the penalty area,6.0,229397


---

In [18]:
key_metrics = [
    "Crosses",
    "Attempt at goal on target",
    "Free kicks",
    "Passes",
    "Fouls against",
    "Attempt at goal inside the penalty area",
    "Sprints",
    "Speed runs",
    "Total distance",
]

---

#### Make the data wide

In [19]:
df_pivot = df.pivot_table(
    index=["name", "player_id", "country_name"], columns="stat", values="value"
).reset_index()

In [20]:
df_pivot.columns = df_pivot.columns.str.lower().str.replace(" ", "_", regex=False)

In [21]:
df_pivot.head()

stat,name,player_id,country_name,assists,attempt_at_goal,attempt_at_goal_blocked,attempt_at_goal_from_free_kicks,attempt_at_goal_inside_the_penalty_area,attempt_at_goal_inside_the_penalty_area_on_target,attempt_at_goal_off_target,attempt_at_goal_on_target,attempt_at_goal_outside_the_penalty_area,attempt_at_goal_outside_the_penalty_area_on_target,attempted_ball_progressions,attempted_switches_of_play,clean_sheets,completed_ball_progressions,completed_switches_of_play,corners,crosses,crosses_completed,defensive_pressures_applied,direct_defensive_pressures_applied,direct_free_kicks,distance_high_speed_running,distance_high_speed_sprinting,distance_jogging,distance_low_speed_sprinting,distance_walking,distributions_completed_under_pressure,distributions_under_pressure,fouls_against,fouls_for,free_kicks,goal_kicks,goalkeeper_defensive_actions_inside_penalty_area,goalkeeper_defensive_actions_outside_penalty_area,goalkeeper_goal_preventions,goals,goals_conceded_from_attempt_at_goal_against,goals_from_direct_free_kicks,goals_inside_the_penalty_area,goals_outside_the_penalty_area,headed_attempt_at_goal,indirect_free_kicks,linebreaks_attempted,linebreaks_attempted_all_lines,linebreaks_attempted_attacking_and_midfield_line,linebreaks_attempted_attacking_line,linebreaks_attempted_attacking_line_completed,linebreaks_attempted_completed,linebreaks_attempted_defensive_line,linebreaks_attempted_defensive_line_completed,linebreaks_attempted_midfield_and_defensive_line,linebreaks_attempted_midfield_line,linebreaks_attempted_midfield_line_completed,linebreaks_attempted_under_pressure,linebreaks_completed_all_lines,linebreaks_completed_attacking_and_midfield_line,linebreaks_completed_midfield_and_defensive_line,linebreaks_completed_under_pressure,matches_played,offers_to_receive_in_behind,offers_to_receive_in_between,offers_to_receive_in_front,offers_to_receive_inside,offers_to_receive_outside,offers_to_receive_total,offsides,own_goals,passes,passes_completed,penalties,penalties_scored,received_offers_to_receive,receptions_between_midfield_and_defensive_line,receptions_under_direct_pressure,receptions_under_indirect_pressure,receptions_under_no_pressure,receptions_under_pressure,red_cards,speed_runs,sprints,substitutions_in,substitutions_out,take_ons_completed,throw_ins,time_played,top_speed,total_distance,yellow_cards
0,Aaron Mooy,312252,Australia,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,6.0,5.0,2.0,4.0,5.0,3.0,10.0,1.0,127.0,14.0,6.0,7143.0,489.0,25041.0,2046.0,13650.0,114.0,152.0,4.0,1.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,83.0,5.0,11.0,26.0,20.0,56.0,6.0,3.0,5.0,51.0,33.0,58.0,3.0,11.0,3.0,37.0,4.0,3.0,63.0,75.0,86.0,55.0,141.0,0.0,0.0,200.0,168.0,0.0,0.0,56.0,16.0,3.0,92.0,117.0,95.0,0.0,585.0,150.0,0.0,0.0,0.0,0.0,390.0,31.0,48367.0,1.0
1,Aaron Ramsey,299617,Wales,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,5.0,0.0,2.0,5.0,1.0,3.0,2.0,132.0,20.0,4.0,6352.0,426.0,12967.0,2740.0,11448.0,55.0,81.0,3.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,3.0,4.0,8.0,6.0,21.0,7.0,4.0,3.0,20.0,11.0,22.0,1.0,4.0,1.0,13.0,3.0,51.0,114.0,56.0,148.0,73.0,221.0,0.0,0.0,114.0,87.0,0.0,0.0,57.0,38.0,7.0,40.0,77.0,47.0,0.0,527.0,195.0,0.0,1.0,1.0,1.0,289.0,30.0,33933.0,1.0
2,Abde Ezzalzouli,441310,Morocco,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,71.0,15.0,0.0,1564.0,592.0,5092.0,875.0,4298.0,9.0,16.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,3.0,3.0,3.0,1.0,4.0,3.0,1.0,3.0,4.0,2.0,10.0,1.0,3.0,1.0,4.0,3.0,20.0,16.0,5.0,16.0,25.0,41.0,0.0,0.0,13.0,10.0,0.0,0.0,8.0,13.0,5.0,10.0,9.0,15.0,0.0,150.0,74.0,3.0,0.0,0.0,1.0,101.0,33.0,12422.0,0.0
3,Abdelhamid Sabiri,448591,Morocco,1.0,3.0,1.0,1.0,1.0,0.0,2.0,0.0,2.0,0.0,2.0,3.0,3.0,1.0,2.0,1.0,1.0,1.0,115.0,15.0,2.0,3982.0,281.0,10441.0,1618.0,7354.0,32.0,41.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,2.0,2.0,3.0,1.0,7.0,3.0,2.0,2.0,8.0,4.0,12.0,1.0,2.0,1.0,6.0,5.0,17.0,63.0,17.0,72.0,25.0,97.0,0.0,0.0,54.0,46.0,0.0,0.0,25.0,17.0,3.0,32.0,28.0,35.0,0.0,358.0,119.0,3.0,2.0,1.0,0.0,195.0,30.0,23675.0,1.0
4,Abdelkarim Hassan,351129,Qatar,0.0,6.0,0.0,0.0,2.0,0.0,5.0,0.0,4.0,0.0,5.0,14.0,0.0,5.0,11.0,0.0,6.0,1.0,34.0,12.0,7.0,2815.0,647.0,13263.0,1414.0,10660.0,86.0,123.0,1.0,4.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,84.0,4.0,17.0,47.0,36.0,59.0,4.0,2.0,4.0,33.0,21.0,64.0,2.0,17.0,2.0,49.0,3.0,15.0,23.0,69.0,29.0,78.0,107.0,1.0,0.0,149.0,120.0,0.0,0.0,44.0,6.0,4.0,44.0,110.0,48.0,0.0,269.0,127.0,0.0,0.0,0.0,2.0,300.0,34.0,28799.0,0.0


---

#### Who ran the most?

In [22]:
df_pivot["total_distance_km"] = (df_pivot["total_distance"] / 1000).round(1)
df_pivot["hours_played"] = (df_pivot["time_played"] / 60).round(2)
df_pivot["km_per_hour"] = (
    df_pivot["total_distance_km"] / df_pivot["hours_played"]
).round(2)

In [23]:
distance_df = (
    df_pivot.query("hours_played > 1.5")
    .sort_values("km_per_hour", ascending=False)[
        [
            "name",
            "player_id",
            "country_name",
            "speed_runs",
            "sprints",
            "total_distance_km",
            "time_played",
            "hours_played",
            "km_per_hour",
        ]
    ]
    .reset_index(drop="True")
)

In [24]:
distance_df.head()

stat,name,player_id,country_name,speed_runs,sprints,total_distance_km,time_played,hours_played,km_per_hour
0,Brenden Aaronson,419055,USA,262.0,99.0,18.1,117.0,1.95,9.28
1,Lovro Majer,448181,Croatia,350.0,125.0,21.3,148.0,2.47,8.62
2,Keanu Baccus,430446,Australia,256.0,97.0,18.2,129.0,2.15,8.47
3,Stephen Eustaquio,433635,Canada,310.0,90.0,18.3,130.0,2.17,8.43
4,Leandro Trossard,448355,Belgium,178.0,92.0,14.8,107.0,1.78,8.31


---

#### Attempts at goal

In [25]:
df_pivot.head(1)

stat,name,player_id,country_name,assists,attempt_at_goal,attempt_at_goal_blocked,attempt_at_goal_from_free_kicks,attempt_at_goal_inside_the_penalty_area,attempt_at_goal_inside_the_penalty_area_on_target,attempt_at_goal_off_target,attempt_at_goal_on_target,attempt_at_goal_outside_the_penalty_area,attempt_at_goal_outside_the_penalty_area_on_target,attempted_ball_progressions,attempted_switches_of_play,clean_sheets,completed_ball_progressions,completed_switches_of_play,corners,crosses,crosses_completed,defensive_pressures_applied,direct_defensive_pressures_applied,direct_free_kicks,distance_high_speed_running,distance_high_speed_sprinting,distance_jogging,distance_low_speed_sprinting,distance_walking,distributions_completed_under_pressure,distributions_under_pressure,fouls_against,fouls_for,free_kicks,goal_kicks,goalkeeper_defensive_actions_inside_penalty_area,goalkeeper_defensive_actions_outside_penalty_area,goalkeeper_goal_preventions,goals,goals_conceded_from_attempt_at_goal_against,goals_from_direct_free_kicks,goals_inside_the_penalty_area,goals_outside_the_penalty_area,headed_attempt_at_goal,indirect_free_kicks,linebreaks_attempted,linebreaks_attempted_all_lines,linebreaks_attempted_attacking_and_midfield_line,linebreaks_attempted_attacking_line,linebreaks_attempted_attacking_line_completed,linebreaks_attempted_completed,linebreaks_attempted_defensive_line,linebreaks_attempted_defensive_line_completed,linebreaks_attempted_midfield_and_defensive_line,linebreaks_attempted_midfield_line,linebreaks_attempted_midfield_line_completed,linebreaks_attempted_under_pressure,linebreaks_completed_all_lines,linebreaks_completed_attacking_and_midfield_line,linebreaks_completed_midfield_and_defensive_line,linebreaks_completed_under_pressure,matches_played,offers_to_receive_in_behind,offers_to_receive_in_between,offers_to_receive_in_front,offers_to_receive_inside,offers_to_receive_outside,offers_to_receive_total,offsides,own_goals,passes,passes_completed,penalties,penalties_scored,received_offers_to_receive,receptions_between_midfield_and_defensive_line,receptions_under_direct_pressure,receptions_under_indirect_pressure,receptions_under_no_pressure,receptions_under_pressure,red_cards,speed_runs,sprints,substitutions_in,substitutions_out,take_ons_completed,throw_ins,time_played,top_speed,total_distance,yellow_cards,total_distance_km,hours_played,km_per_hour
0,Aaron Mooy,312252,Australia,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,6.0,5.0,2.0,4.0,5.0,3.0,10.0,1.0,127.0,14.0,6.0,7143.0,489.0,25041.0,2046.0,13650.0,114.0,152.0,4.0,1.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,83.0,5.0,11.0,26.0,20.0,56.0,6.0,3.0,5.0,51.0,33.0,58.0,3.0,11.0,3.0,37.0,4.0,3.0,63.0,75.0,86.0,55.0,141.0,0.0,0.0,200.0,168.0,0.0,0.0,56.0,16.0,3.0,92.0,117.0,95.0,0.0,585.0,150.0,0.0,0.0,0.0,0.0,390.0,31.0,48367.0,1.0,48.4,6.5,7.45


In [26]:
goals = df_pivot[
    [
        "name",
        "player_id",
        "country_name",
        "assists",
        "attempt_at_goal",
        "attempt_at_goal_on_target",
        "penalties",
        "goals",
    ]
].sort_values(["goals"], ascending=False)

In [27]:
goals.head(8)

stat,name,player_id,country_name,assists,attempt_at_goal,attempt_at_goal_on_target,penalties,goals
380,Kylian Mbappe,389867,France,2.0,31.0,12.0,2.0,8.0
391,Lionel Messi,229397,Argentina,3.0,32.0,18.0,5.0,7.0
334,Julian Alvarez,416081,Argentina,0.0,12.0,7.0,0.0,4.0
504,Olivier Giroud,358015,France,0.0,15.0,6.0,0.0,4.0
543,Richarlison,429920,Brazil,0.0,7.0,4.0,0.0,3.0
231,Goncalo Ramos,448081,Portugal,1.0,6.0,5.0,0.0,3.0
413,Marcus Rashford,401470,England,0.0,11.0,6.0,0.0,3.0
46,Alvaro Morata,314291,Spain,1.0,8.0,5.0,0.0,3.0


---

## Exports

In [28]:
df.to_csv("data/processed/world_cup_player_stats.csv", index=False)
df.query('country_name == "USA"').to_csv(
    "data/processed/world_cup_player_stats_usa.csv", index=False
)