In [1]:
import json
import pandas as pd
import numpy as np

# Load player season history and create features

In [2]:
# Load all player season history into a single data frame
with open('data/during-season/bootstrap-static.json', 'r') as json_file:
    data = json.loads(json_file.read())
    
df_player_overview = pd.json_normalize(data, record_path='elements')

df_players_history = pd.DataFrame()
for index, player in df_player_overview.iterrows():
    player_id = str(player['id'])
    first_name = player['first_name']
    last_name = player['second_name']
    team_id = player['team']
    file_name = f"{player_id.rjust(3, '0')}_{first_name}_{last_name}".replace(' ', '_')

    with open(f'data/during-season/players/{file_name}.json', 'r') as json_file:
        player_data = json.loads(json_file.read())
        
    df_player_history = pd.json_normalize(player_data, record_path='history')
    df_player_history['name'] = f'{first_name} {last_name}'
    df_player_history['team_id'] = team_id
        
    df_players_history = df_players_history.append(df_player_history, ignore_index=True)

df_players_history.shape

(2663, 35)

In [3]:
# Load teams data
df_teams = pd.json_normalize(data, record_path='teams')
df_teams.set_index('id', inplace=True)

# Map team ids to team names for better readability
df_players_history['team'] = df_players_history['team_id'].map(df_teams['name'])
df_players_history['opponent_team_id'] = df_players_history['opponent_team']
df_players_history['opponent_team'] = df_players_history['opponent_team_id'].map(df_teams['name'])

In [4]:
df_players_history.head(5)

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,...,recoveries,value,transfers_balance,selected,transfers_in,transfers_out,name,team_id,team,opponent_team_id
0,1,6,Kalmar FF,6,False,2022-04-03T15:30:00Z,0,1,1,90,...,5,60,0,4920,0,0,Johan Dahlin,1,Malmö FF,10
1,1,16,IF Elfsborg,3,True,2022-04-11T17:10:00Z,1,1,2,90,...,6,60,201,5787,284,83,Johan Dahlin,1,Malmö FF,5
2,1,24,AIK,7,True,2022-04-17T13:00:00Z,3,0,3,90,...,11,60,39,5997,127,88,Johan Dahlin,1,Malmö FF,12
3,1,32,IFK Värnamo,7,False,2022-04-21T17:00:00Z,0,0,4,90,...,5,60,241,6355,319,78,Johan Dahlin,1,Malmö FF,18
4,1,39,IFK Göteborg,6,True,2022-04-25T17:10:00Z,1,0,5,90,...,6,60,144,6527,244,100,Johan Dahlin,1,Malmö FF,4


In [5]:
df_players_history.to_csv('data/during-season/player_history.csv', index=False)

## Create data file for player future events

In [6]:
# Load all player season future events into a single csv
with open('data/during-season/bootstrap-static.json', 'r') as json_file:
    data = json.loads(json_file.read())
    
df_player_overview = pd.json_normalize(data, record_path='elements')

df_all_player_fixtures = pd.DataFrame()
for index, player in df_player_overview.iterrows():
    player_id = str(player['id'])
    first_name = player['first_name']
    last_name = player['second_name']
    team_id = player['team']
    file_name = f"{player_id.rjust(3, '0')}_{first_name}_{last_name}".replace(' ', '_')

    with open(f'data/during-season/players/{file_name}.json', 'r') as json_file:
        player_data = json.loads(json_file.read())
        
    df_player_fixtures = pd.json_normalize(player_data, record_path='fixtures')
    df_player_fixtures['element'] = player_id
    df_player_fixtures['name'] = f'{first_name} {last_name}'
        
    df_all_player_fixtures = df_all_player_fixtures.append(df_player_fixtures, ignore_index=True)

In [7]:
# Set home and away team
df_all_player_fixtures['team_id'] = np.where(df_all_player_fixtures['is_home'], df_all_player_fixtures['team_h'], df_all_player_fixtures['team_a'])
df_all_player_fixtures['opponent_team_id'] = np.where(df_all_player_fixtures['is_home'] == False, df_all_player_fixtures['team_h'], df_all_player_fixtures['team_a'])

# Map team ids to team names for better readability
df_all_player_fixtures['team'] = df_all_player_fixtures['team_id'].map(df_teams['name'])
df_all_player_fixtures['opponent_team'] = df_all_player_fixtures['opponent_team_id'].map(df_teams['name'])

In [8]:
df_all_player_fixtures.head()

Unnamed: 0,id,code,team_h,team_h_score,team_a,team_a_score,event,finished,minutes,provisional_start_time,kickoff_time,event_name,is_home,element,name,team_id,opponent_team_id,team,opponent_team
0,51,2270081,1,,14,,7,False,0,False,2022-05-07T15:30:00Z,Omgång 7,True,1,Johan Dahlin,1,14,Malmö FF,Mjällby AIF
1,58,2270148,12,,1,,7,False,0,False,2022-05-11T17:00:00Z,Omgång 7,False,1,Johan Dahlin,1,12,Malmö FF,AIK
2,66,2270086,7,,1,,8,False,0,False,2022-05-16T17:10:00Z,Omgång 8,False,1,Johan Dahlin,1,7,Malmö FF,Djurgården
3,71,2270097,1,,6,,9,False,0,False,2022-05-22T15:30:00Z,Omgång 9,True,1,Johan Dahlin,1,6,Malmö FF,BK Häcken
4,75,2270101,16,,1,,10,False,0,False,2022-05-29T13:00:00Z,Omgång 10,False,1,Johan Dahlin,1,16,Malmö FF,Degerfors IF


In [9]:
df_all_player_fixtures.to_csv('data/during-season/player_fixtures.csv', index=False)