In [1]:
import json
import pandas as pd
import numpy as np

# Load player season history and create features

In [2]:
# Load all player season history into a single data frame
with open('data/during-season/bootstrap-static.json', 'r') as json_file:
    data = json.loads(json_file.read())
    
df_player_overview = pd.json_normalize(data, record_path='elements')

df_players_history = pd.DataFrame()
for index, player in df_player_overview.iterrows():
    player_id = str(player['id'])
    first_name = player['first_name']
    last_name = player['second_name']
    team_id = player['team']
    file_name = f"{player_id.rjust(3, '0')}_{first_name}_{last_name}".replace(' ', '_')

    with open(f'data/during-season/players/{file_name}.json', 'r') as json_file:
        player_data = json.loads(json_file.read())
        
    df_player_history = pd.json_normalize(player_data, record_path='history')
    df_player_history['name'] = f'{first_name} {last_name}'
    df_player_history['team_id'] = team_id
        
    df_players_history = df_players_history.append(df_player_history, ignore_index=True)

df_players_history.shape

(6324, 35)

In [3]:
# Load teams data
df_teams = pd.json_normalize(data, record_path='teams')
df_teams.set_index('id', inplace=True)

# Map team ids to team names for better readability
df_players_history['team'] = df_players_history['team_id'].map(df_teams['name'])
df_players_history['opponent_team_id'] = df_players_history['opponent_team']
df_players_history['opponent_team'] = df_players_history['opponent_team_id'].map(df_teams['name'])

In [11]:
df_players_history.tail(5)

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,...,recoveries,value,transfers_balance,selected,transfers_in,transfers_out,name,team_id,team,opponent_team_id
6319,475.0,113.0,Degerfors IF,1.0,True,2022-07-17T13:00:00Z,2.0,0.0,14.0,9.0,...,1.0,45.0,0.0,0.0,0.0,0.0,Óli Ómarsson,3,IK Sirius,16.0
6320,476.0,110.0,Hammarby,0.0,False,2022-07-17T13:00:00Z,3.0,0.0,14.0,0.0,...,0.0,65.0,2.0,2.0,2.0,0.0,Niklas Hult,5,IF Elfsborg,2.0
6321,477.0,110.0,IF Elfsborg,0.0,True,2022-07-17T13:00:00Z,3.0,0.0,14.0,0.0,...,0.0,45.0,0.0,0.0,0.0,0.0,Fredrik Hammar,2,Hammarby,5.0
6322,478.0,113.0,IK Sirius,0.0,False,2022-07-17T13:00:00Z,2.0,0.0,14.0,0.0,...,0.0,40.0,0.0,0.0,0.0,0.0,Elyas Bouzaiene,16,Degerfors IF,3.0
6323,481.0,114.0,Varbergs BoIS,0.0,False,2022-07-18T17:00:00Z,0.0,0.0,14.0,0.0,...,0.0,45.0,0.0,0.0,0.0,0.0,Anton Nilsson,19,Helsingborgs IF,13.0


In [10]:
df_players_history.nunique()

element                             479
fixture                             114
opponent_team                        16
total_points                         25
was_home                              2
kickoff_time                         68
team_h_score                          6
team_a_score                          7
round                                14
minutes                              84
goals_scored                          4
assists                               4
clean_sheets                          2
goals_conceded                        7
penalties_saved                       2
penalties_missed                      2
yellow_cards                          2
red_cards                             2
saves                                12
own_goals                             2
attacking_bonus                       3
defending_bonus                       3
winning_goals                         2
crosses                               9
key_passes                            9


In [5]:
df_players_history.to_csv('data/during-season/player_history.csv', index=False)

## Create data file for player future events

In [6]:
# Load all player season future events into a single csv
with open('data/during-season/bootstrap-static.json', 'r') as json_file:
    data = json.loads(json_file.read())
    
df_player_overview = pd.json_normalize(data, record_path='elements')

df_all_player_fixtures = pd.DataFrame()
for index, player in df_player_overview.iterrows():
    player_id = str(player['id'])
    first_name = player['first_name']
    last_name = player['second_name']
    team_id = player['team']
    file_name = f"{player_id.rjust(3, '0')}_{first_name}_{last_name}".replace(' ', '_')

    with open(f'data/during-season/players/{file_name}.json', 'r') as json_file:
        player_data = json.loads(json_file.read())
        
    df_player_fixtures = pd.json_normalize(player_data, record_path='fixtures')
    df_player_fixtures['element'] = player_id
    df_player_fixtures['name'] = f'{first_name} {last_name}'
        
    df_all_player_fixtures = df_all_player_fixtures.append(df_player_fixtures, ignore_index=True)

In [7]:
# Set home and away team
df_all_player_fixtures['team_id'] = np.where(df_all_player_fixtures['is_home'], df_all_player_fixtures['team_h'], df_all_player_fixtures['team_a'])
df_all_player_fixtures['opponent_team_id'] = np.where(df_all_player_fixtures['is_home'] == False, df_all_player_fixtures['team_h'], df_all_player_fixtures['team_a'])

# Map team ids to team names for better readability
df_all_player_fixtures['team'] = df_all_player_fixtures['team_id'].map(df_teams['name'])
df_all_player_fixtures['opponent_team'] = df_all_player_fixtures['opponent_team_id'].map(df_teams['name'])

In [8]:
df_all_player_fixtures.head()

Unnamed: 0,id,code,team_h,team_h_score,team_a,team_a_score,event,finished,minutes,provisional_start_time,kickoff_time,event_name,is_home,element,name,team_id,opponent_team_id,team,opponent_team
0,120,2270145,1,,3,,15,False,0,False,2022-07-23T13:00:00Z,Omgång 15,True,1,Johan Dahlin,1,3,Malmö FF,IK Sirius
1,133,2270160,3,,1,,17,False,0,False,2022-08-06T13:00:00Z,Omgång 17,False,1,Johan Dahlin,1,3,Malmö FF,IK Sirius
2,144,2270171,1,,17,,18,False,0,False,2022-08-13T13:00:00Z,Omgång 18,True,1,Johan Dahlin,1,17,Malmö FF,GIF Sundsvall
3,151,2270177,14,,1,,19,False,0,False,2022-08-20T13:00:00Z,Omgång 19,False,1,Johan Dahlin,1,14,Malmö FF,Mjällby AIF
4,159,2270186,1,,10,,20,False,0,False,2022-08-28T15:30:00Z,Omgång 20,True,1,Johan Dahlin,1,10,Malmö FF,Kalmar FF


In [9]:
df_all_player_fixtures.to_csv('data/during-season/player_fixtures.csv', index=False)