# FIFA API: World Cup team results

#### Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get data

#### Read results from FIFA API

In [5]:
results_dicts = pd.read_json(
    "https://api.fifa.com/api/v3/calendar/matches?language=en&count=500&idSeason=255711"
)["Results"]

In [6]:
match_dict = {key: value for key, value in results_dicts.items() if len(value) > 1}

In [7]:
src = pd.DataFrame(match_dict).T

In [8]:
src.columns = src.columns.str.lower()

In [9]:
src[["locale", "group_name"]] = pd.json_normalize(src["groupname"])[0].apply(
    pd.Series, dtype="object"
)

In [10]:
src[
    ["Humidity", "Temperature", "WindSpeed", "Type", "TypeLocalized"]
] = pd.json_normalize(src["weather"])

#### Wrangling json

In [11]:
src["stadium_name"] = src["stadium"].apply(pd.Series)["Name"].apply(pd.Series)

In [12]:
src[["Locale", "Description"]] = pd.json_normalize(src["stadium_name"])

In [13]:
src["weather_description"] = pd.json_normalize(src["TypeLocalized"])

In [14]:
src[["Locale", "Description"]] = pd.json_normalize(src["weather_description"])

In [15]:
src.head(1)

Unnamed: 0,idcompetition,idseason,idstage,idgroup,weather,attendance,idmatch,matchday,stagename,groupname,competitionname,seasonname,seasonshortname,date,localdate,home,away,hometeamscore,awayteamscore,aggregatehometeamscore,aggregateawayteamscore,hometeampenaltyscore,awayteampenaltyscore,lastperiodupdate,leg,ishomematch,stadium,isticketsalesallowed,matchtime,secondhalftime,firsthalftime,firsthalfextratime,secondhalfextratime,winner,matchreporturl,placeholdera,placeholderb,ballpossession,officials,matchstatus,resulttype,matchnumber,timedefined,officialitystatus,matchleginfo,properties,isupdateable,locale,group_name,Humidity,Temperature,WindSpeed,Type,TypeLocalized,stadium_name,Locale,Description,weather_description
0,17,255711,285063,285065,"{'Humidity': '61', 'Temperature': '24', 'WindS...",67372,400128082,,"[{'Locale': 'en-GB', 'Description': 'First sta...","[{'Locale': 'en-GB', 'Description': 'Group A'}]","[{'Locale': 'en-GB', 'Description': 'FIFA Worl...","[{'Locale': 'en-GB', 'Description': 'FIFA Worl...",[],2022-11-20T16:00:00Z,2022-11-20T19:00:00Z,"{'Score': 0, 'Side': None, 'IdTeam': '43834', ...","{'Score': 2, 'Side': None, 'IdTeam': '43927', ...",0,2,,,0,0,,,,"{'IdStadium': '400090319', 'Name': [{'Locale':...",,0',,,,,43927,,A1,A2,"{'Intervals': [], 'LastX': [], 'OverallHome': ...","[{'IdCountry': 'ITA', 'OfficialId': '315593', ...",0,1,1,True,1,,{'IdIFES': '128084'},,en-GB,Group A,61,24,8,10,"[{'Locale': 'en-GB', 'Description': 'Clear Nig...","{'Locale': 'en-GB', 'Description': 'Al Bayt St...",en-GB,Clear Night,"{'Locale': 'en-GB', 'Description': 'Clear Night'}"


In [16]:
src[
    [
        "home_Score",
        "home_Side",
        "home_IdTeam",
        "home_PictureUrl",
        "home_IdCountry",
        "home_Tactics",
        "home_TeamType",
        "home_AgeType",
        "home_TeamName",
        "home_Abbreviation",
        "home_ShortClubName",
        "home_FootballType",
        "home_Gender",
        "home_IdAssociation",
    ]
] = pd.json_normalize(src["home"])

In [17]:
src[
    [
        "away_Score",
        "away_Side",
        "away_IdTeam",
        "away_PictureUrl",
        "away_IdCountry",
        "away_Tactics",
        "away_TeamType",
        "away_AgeType",
        "away_TeamName",
        "away_Abbreviation",
        "away_ShortClubName",
        "away_FootballType",
        "away_Gender",
        "away_IdAssociation",
    ]
] = pd.json_normalize(src["away"])

In [18]:
src[["Intervals", "LastX", "OverallHome", "OverallAway"]] = pd.json_normalize(
    src["ballpossession"]
)

---

#### Clear out what we don't need

In [19]:
df = src.drop(
    [
        "idcompetition",
        "hometeamscore",
        "awayteamscore",
        "hometeampenaltyscore",
        "awayteampenaltyscore",
        "idseason",
        "idstage",
        "idgroup",
        "weather",
        "idmatch",
        "stagename",
        "groupname",
        "competitionname",
        "seasonname",
        "seasonshortname",
        "lastperiodupdate",
        "leg",
        "ishomematch",
        "stadium",
        "isticketsalesallowed",
        "matchtime",
        "secondhalftime",
        "firsthalftime",
        "firsthalfextratime",
        "secondhalfextratime",
        "matchreporturl",
        "placeholderb",
        "officials",
        "matchnumber",
        "timedefined",
        "officialitystatus",
        "matchleginfo",
        "properties",
        # "winner",
        "isupdateable",
        "locale",
        "Type",
        "TypeLocalized",
        "Locale",
        "Description",
        "weather_description",
        "home",
        "away",
        "matchday",
        "aggregatehometeamscore",
        "aggregateawayteamscore",
        "placeholdera",
        "ballpossession",
        "stadium_name",
        "away_PictureUrl",
        "away_TeamName",
        "away_Side",
        "away_IdCountry",
        "away_AgeType",
        "away_TeamType",
        "home_PictureUrl",
        "home_TeamName",
        "home_Side",
        "home_IdCountry",
        "home_TeamType",
        "home_AgeType",
        "away_FootballType",
        "away_IdAssociation",
        "home_FootballType",
        "home_IdAssociation",
        "LastX",
        # "OverallHome",
        # "OverallAway",
        "Intervals",
        "away_Gender",
        "home_Gender",
        "WindSpeed",
        "date",
        "away_Abbreviation",
        "home_Abbreviation",
    ],
    axis=1,
).copy()

In [20]:
df["localdate"] = pd.to_datetime(df["localdate"]).dt.date

In [21]:
df.columns = df.columns.str.lower().str.replace("shortclubname", "team", regex=False)

#### Mean weather

In [22]:
df["temperature"].fillna(np.NaN).astype(float).mean()

25.15625

In [23]:
df["humidity"].fillna(np.NaN).astype(float).mean()

58.28125

----

#### Tactics

In [24]:
df.home_tactics.value_counts()

4-3-3    29
4-5-1    11
4-4-2     8
3-5-2     7
3-4-3     6
5-3-2     2
5-4-1     1
Name: home_tactics, dtype: int64

In [25]:
df.away_tactics.value_counts()

4-3-3    26
4-5-1    11
4-4-2     9
3-4-3     8
5-3-2     5
3-5-2     4
3-6-1     1
Name: away_tactics, dtype: int64

#### USA

In [26]:
df.query('away_team == "USA" or home_team == "USA"')

Unnamed: 0,attendance,localdate,winner,matchstatus,resulttype,group_name,humidity,temperature,home_score,home_idteam,home_tactics,home_team,away_score,away_idteam,away_tactics,away_team,overallhome,overallaway
3,43418,2022-11-21,,0,1,Group B,71,23,1,43921,4-3-3,USA,1,43974,5-3-2,Wales,57.767494,42.232506
19,68463,2022-11-25,,0,1,Group B,66,25,0,43942,4-3-3,England,0,43921,4-3-3,USA,54.3105,45.689503
35,42127,2022-11-29,43921.0,0,1,Group B,75,24,0,43817,4-5-1,IR Iran,1,43921,4-3-3,USA,47.34187,52.65813
48,44846,2022-12-03,43960.0,0,1,,59,26,3,43960,3-5-2,Netherlands,1,43921,4-3-3,USA,42.413094,57.58691


In [27]:
usa_df = df.query('away_team == "USA" or home_team == "USA"')

In [28]:
df["winner_team"] = (
    df[["home_score", "away_score"]].idxmax(axis=1).str.replace("_score", "")
)
df["winner_possession"] = (
    df[["overallhome", "overallaway"]].idxmax(axis=1).str.replace("overall", "")
)

In [29]:
possession = df[
    [
        "home_team",
        "home_score",
        "away_team",
        "away_score",
        "winner_team",
        "overallhome",
        "overallaway",
        "winner_possession",
    ]
].dropna()

In [30]:
possession["winner_same"] = possession["winner_team"] == possession["winner_possession"]

In [31]:
possession.winner_same.value_counts()

True     32
False    32
Name: winner_same, dtype: int64

In [32]:
possession.tail(10)

Unnamed: 0,home_team,home_score,away_team,away_score,winner_team,overallhome,overallaway,winner_possession,winner_same
54,Morocco,0,Spain,0,home,28.400448,71.59955,away,False
55,Portugal,6,Switzerland,1,home,46.83821,53.161785,away,False
56,Croatia,1,Brazil,1,home,51.763077,48.236923,home,True
57,Netherlands,2,Argentina,2,home,53.35453,46.64547,home,True
58,Morocco,1,Portugal,0,home,31.314236,68.68577,away,False
59,England,1,France,2,away,57.90713,42.09287,home,False
60,Argentina,3,Croatia,0,home,41.335835,58.66416,away,False
61,France,2,Morocco,0,home,40.78011,59.21989,away,False
62,Croatia,2,Morocco,1,home,48.827496,51.17251,away,False
63,Argentina,3,France,3,home,51.878418,48.121582,home,True


---

## Export

In [33]:
df.to_csv("data/processed/world_cup_matches_results.csv", index=False)
usa_df.to_csv("data/processed/world_cup_matches_results_usa.csv", index=False)