In [9]:
# Imports
import requests
import pandas as pd

In [4]:
# Official FPL API Endpoints
API_URL = "https://fantasy.premierleague.com/api/"
ALL_DATA_ENDPOINT = "bootstrap-static"
FIXTURES_ENDPOINT = "fixtures"
PLAYER_DATA_ENDPOINT = "element-summary"

In [5]:
# Fetch all the data
data = requests.get(API_URL + ALL_DATA_ENDPOINT).json()

In [7]:
# Check the different data types in the result
data.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [None]:
# Create a data frame for events
# Events contains overall data for each Gameweek
events_df = pd.DataFrame(data["events"])

In [16]:
events_df.head()

Unnamed: 0,id,name,deadline_time,average_entry_score,finished,data_checked,highest_scoring_entry,deadline_time_epoch,deadline_time_game_offset,highest_score,...,cup_leagues_created,h2h_ko_matches_created,chip_plays,most_selected,most_transferred_in,top_element,top_element_info,transfers_made,most_captained,most_vice_captained
0,1,Gameweek 1,2023-08-11T17:30:00Z,64,True,True,3383750.0,1691775000,0,127.0,...,False,False,"[{'chip_name': 'bboost', 'num_played': 163222}...",355.0,1.0,395.0,"{'id': 395, 'points': 14}",0,355.0,19.0
1,2,Gameweek 2,2023-08-18T17:15:00Z,44,True,True,3338487.0,1692378900,0,120.0,...,True,True,"[{'chip_name': 'bboost', 'num_played': 126778}...",355.0,195.0,108.0,"{'id': 108, 'points': 16}",13130353,355.0,19.0
2,3,Gameweek 3,2023-08-25T17:30:00Z,44,True,True,9368956.0,1692984600,0,128.0,...,True,True,"[{'chip_name': 'bboost', 'num_played': 124110}...",355.0,108.0,216.0,"{'id': 216, 'points': 19}",17619532,355.0,19.0
3,4,Gameweek 4,2023-09-01T17:30:00Z,72,True,True,4354697.0,1693589400,0,148.0,...,True,True,"[{'chip_name': 'bboost', 'num_played': 109196}...",355.0,216.0,516.0,"{'id': 516, 'points': 20}",16035365,355.0,19.0
4,5,Gameweek 5,2023-09-16T10:00:00Z,44,True,True,6211182.0,1694858400,0,102.0,...,True,True,"[{'chip_name': 'bboost', 'num_played': 96469},...",355.0,516.0,344.0,"{'id': 344, 'points': 13}",14363988,355.0,19.0


In [20]:
# Create a data frame for all the teams
teams_df = pd.DataFrame(data["teams"])

In [21]:
teams_df.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,...,,False,0,1230,1285,1250,1250,1210,1320,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,...,,False,0,1115,1175,1130,1190,1100,1160,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,...,,False,0,1060,1095,1050,1100,1060,1090,127
3,94,0,,4,0,Brentford,0,0,0,BRE,...,,False,0,1125,1205,1120,1220,1130,1190,130
4,36,0,,5,0,Brighton,0,0,0,BHA,...,,False,0,1165,1210,1120,1200,1210,1240,131


In [27]:
# Create a data frame for all the players. This has overall stats for the players so far this season
players_df = pd.DataFrame(data["elements"])

In [28]:
players_df.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90
0,0.0,0.0,232223,0,0,-1,1,0,4,0.0,...,524,94,547,54,611,64,305,47,0.0,0.0
1,,,58822,0,0,-1,1,0,2,0.5,...,716,227,378,118,474,158,251,99,0.0,0.0
2,100.0,100.0,153256,0,0,-1,1,0,3,0.5,...,492,281,527,193,54,30,403,130,0.0,0.0
3,0.0,0.0,438098,0,0,-1,1,0,3,0.0,...,127,80,744,322,185,85,405,131,0.76,0.38
4,100.0,100.0,226597,1,-1,-1,1,0,2,3.8,...,254,33,82,21,119,38,20,7,0.97,0.43


In [None]:
# Get detailed info for each player from players_df.
player_fixtures_df = pd.DataFrame()
player_history_df = pd.DataFrame()
player_history_past_df = pd.DataFrame()

for i in range(len(players_df)):
    # Fetch the player id
    player_id = players_df.iloc[i]["id"]
    # Get the individual player data
    player_data = requests.get(API_URL + PLAYER_DATA_ENDPOINT + f"/{player_id}").json()
    # Extract the upcoming fixtures 
    player_fixtures = pd.DataFrame(player_data["fixtures"])
    # Extract past fixtures from the same season
    player_history = pd.DataFrame(player_data["history"])
    # Extract past seasons' data
    player_history_past = pd.DataFrame(player_data["history_past"])
    
    # Add player id to all the data frames
    player_fixtures["player_id"] = player_id
    player_history["player_id"] = player_id
    player_history_past["player_id"] = player_id

    player_fixtures_df = pd.concat([player_fixtures_df, player_fixtures])
    player_history_df = pd.concat([player_history_df, player_history])
    player_history_past_df = pd.concat([player_history_past_df, player_history_past])

    print(f"Fetching data for {players_df.iloc[i]['first_name']} {players_df.iloc[i]['second_name']}")
    if i % 20 == 0:
        print(f"Progress: {(i / len(players_df) * 100): .2f}%")


In [None]:
player_fixtures_df.head()

In [None]:
player_history_df.head()

In [None]:
player_history_past_df.head()

In [None]:
# Save all the dataframes
events_df.to_csv("data/events.csv")
teams_df.to_csv("data/teams.csv")
players_df.to_csv("data/players.csv")
player_fixtures_df.to_csv("data/player_fixtures.csv")
player_history_df.to_csv("data/player_history.csv")
player_history_past_df.to_csv("data/player_history_past.csv")