# All players data to a dataframe

This script will extract stats from element-summary/history and append to a single table.

#### Import Packages

In [1]:
# Import Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import json
import time
import collections

pd.set_option('display.max_columns', None)

#### Initialise loop paramters

In [2]:
# number of players
# hardcoded as this is fixed and saves time counting files.
nPlayers = 624

# initialise count.
# this will increase for each player's fixture
count = 0;

# get column names
# bool which changes to false after one step of the loop to get column headers once.
gcn = True

# Empty ordered dictionary file
# all player's fixtures will be stored here
data_dict = collections.OrderedDict()

#### Begin loop

This will read all the "element-summary" files (each belongs to a different player), and extracts all stats from each fixture and appends to a large table.

In [3]:
# Start timer
start_time = time.time()

# Loop through all players
for i in range(1,nPlayers+1):
    
    # Open json file
    with open('../../data/json/element-summary/' + str(i) + '.json') as f:
        es = json.load(f)
        
    # Number of fixtures for this player
    nFixtures = len(es["history"])
    
    # Get column names. only do this once
    if gcn:
        cn = es["history"][0].keys()
        gcn = False
    
    # Loop through this players fixtures
    for m in range(0, nFixtures):
        a = es["history"][m]
        data_dict[count] = a
        count = count+1

data = pd.DataFrame.from_dict(data_dict, orient="index")
print("Time Taken: %s seconds" % (time.time() - start_time))

Time Taken: 5.920746564865112 seconds


This method reads each fixture's statistics as dictionary values and appends them to a master dictionary. Converting each fixture to a dataframe and then appending to a larger dataframe is much slower, as a copy of the dataframe is stored in memory every time something is appended.

#### Clean table
- Restart indexing from 0

In [4]:
# Preview the final table
data['element'] = data['element']-1
data['fixture'] = data['fixture']-1
data['opponent_team'] = data['opponent_team']-1
data.head()

Unnamed: 0,id,kickoff_time,kickoff_time_formatted,team_h_score,team_a_score,was_home,round,total_points,value,transfers_balance,selected,transfers_in,transfers_out,loaned_in,loaned_out,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,ea_index,open_play_crosses,big_chances_created,clearances_blocks_interceptions,recoveries,key_passes,tackles,winning_goals,attempted_passes,completed_passes,penalties_conceded,big_chances_missed,errors_leading_to_goal,errors_leading_to_goal_attempt,tackled,offside,target_missed,fouls,dribbles,element,fixture,opponent_team
0,1,2018-08-12T15:00:00Z,12 Aug 16:00,0,2,True,1,3,50,0,70767,0,0,0,0,90,0,0,0,2,0,0,0,0,0,6,0,24,47.0,0.0,0.0,4.7,0,0,0,2,9,0,0,0,42,31,0,0,0,0,0,0,0,0,0,0,0,12
1,526,2018-08-18T16:30:00Z,18 Aug 17:30,3,2,False,2,3,50,188,90038,6059,5871,0,0,90,0,0,0,3,0,0,0,0,0,8,0,26,53.0,0.0,0.0,5.3,0,0,0,1,8,0,0,0,35,25,0,0,0,0,0,0,0,0,0,0,13,5
2,1053,2018-08-25T14:00:00Z,25 Aug 15:00,3,1,True,3,3,50,8540,107301,19550,11010,0,0,90,0,0,0,1,0,0,0,0,0,4,0,17,27.8,0.0,0.0,2.8,0,0,0,0,10,0,0,0,24,20,0,0,0,0,0,0,0,0,0,0,20,18
3,1584,2018-09-02T12:30:00Z,02 Sep 13:30,2,3,False,4,1,50,9582,123566,19332,9750,0,0,90,0,0,0,2,0,0,0,0,0,1,0,8,2.4,0.0,0.0,0.2,0,0,0,0,8,0,0,0,38,24,0,0,0,2,0,0,0,0,0,0,32,4
4,2121,2018-09-15T14:00:00Z,15 Sep 15:00,1,2,False,5,2,50,-3297,123310,8837,12134,0,0,90,0,0,0,1,0,0,0,0,0,1,0,13,14.2,0.0,0.0,1.4,0,0,0,2,13,0,0,0,33,23,0,0,0,0,0,0,0,0,0,0,45,14


In [5]:
# Save a csv
data.to_csv(r'../../data/csv/allplayers.csv', index=False, index_label=False)