# Developing features

In [1]:
# import packages
import pandas as pd
import numpy as np
import json
pd.set_option('display.max_columns', None)

In [2]:
# Read an element summary file
fn_es = '../data/json/element-summary/1.json'
with open(fn_es, 'r') as f:
    element_summary = json.load(f)

In [3]:
# Read elements file
fn_e = '../data/json/elements/elements.json'
with open(fn_e, 'r') as f:
    elements = json.load(f)

In [4]:
# Read team file
fn_t = '../data/json/teams/teams.json'
with open(fn_t, 'r') as f:
    teams = json.load(f)

In [5]:
# Read fixtures file
fn_f = '../data/json/fixtures/fixtures.json'
with open(fn_f, 'r') as f:
    fixtures = json.load(f)

In [6]:
# View history DataFrame
player = pd.DataFrame(element_summary['history'])[['element', 'was_home', 'fixture', 'total_points']]
player['element'] = player['element']-1 # Convert to python index
player['fixture'] = player['fixture']-1 # Convert to python index
player.head()

Unnamed: 0,element,was_home,fixture,total_points
0,0,True,0,3
1,0,False,13,3
2,0,True,20,3
3,0,False,32,1
4,0,False,45,2


In [7]:
# View elements
elements_df = pd.DataFrame(elements)
elements_df.head()

Unnamed: 0,assists,bonus,bps,chance_of_playing_next_round,chance_of_playing_this_round,clean_sheets,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,creativity,dreamteam_count,ea_index,element_type,ep_next,ep_this,event_points,first_name,form,goals_conceded,goals_scored,ict_index,id,in_dreamteam,influence,loaned_in,loaned_out,loans_in,loans_out,minutes,news,news_added,now_cost,own_goals,penalties_missed,penalties_saved,photo,points_per_game,red_cards,saves,second_name,selected_by_percent,special,squad_number,status,team,team_code,threat,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,yellow_cards
0,0,3,130,100.0,100.0,1,11334,0,0,-3,3,0.0,0,0,1,0.5,0.5,0,Petr,0.0,9,0,20.4,1,False,205.0,0,0,0,0,585,,2018-09-29T17:31:14Z,47,0,0,0,11334.jpg,3.4,0,27,Cech,1.1,False,1.0,a,1,3,0.0,24,83497,0,136211,0,0.0,5.1,Cech,0
1,0,5,568,,,6,80201,0,0,-1,1,0.0,1,0,1,3.7,3.1,3,Bernd,2.6,42,0,80.5,2,False,807.2,0,0,0,0,2835,,,49,0,0,0,80201.jpg,3.3,0,105,Leno,4.0,False,19.0,a,1,3,0.0,106,339095,0,250834,0,0.5,21.6,Leno,0
2,0,8,319,100.0,100.0,3,51507,0,0,-1,1,29.5,1,0,2,2.7,1.1,1,Laurent,0.6,23,3,59.1,3,False,456.4,0,0,0,0,1329,,2019-05-03T08:31:19Z,54,0,0,0,51507.jpg,3.6,0,0,Koscielny,0.9,False,6.0,a,1,3,105.0,62,128478,0,92187,0,0.1,11.5,Koscielny,1
3,5,5,304,0.0,0.0,4,98745,0,0,-2,2,197.8,0,0,2,0.0,0.0,0,Héctor,0.0,21,0,73.7,4,False,261.6,0,0,0,0,1532,Knee injury - Unknown return date,2019-01-19T20:01:19Z,53,1,0,0,98745.jpg,3.2,0,0,Bellerín,4.5,False,2.0,i,1,3,280.0,60,567084,0,1143684,0,0.0,11.3,Bellerín,3
4,4,7,392,100.0,100.0,5,38411,0,0,-1,1,196.6,1,0,2,3.3,2.1,2,Nacho,1.6,24,1,83.4,5,False,413.2,0,0,0,0,1860,,2019-04-28T10:31:25Z,54,0,0,0,38411.jpg,3.5,0,0,Monreal,1.3,False,18.0,a,1,3,224.0,77,298216,0,290921,0,0.3,14.3,Monreal,5


In [8]:
# Add element type as column
b = elements_df['element_type'][player['element']].reset_index(drop=True)
player['element_type'] = b-1
player.head()

Unnamed: 0,element,was_home,fixture,total_points,element_type
0,0,True,0,3,0
1,0,False,13,3,0
2,0,True,20,3,0
3,0,False,32,1,0
4,0,False,45,2,0


In [9]:
# Get teams from fixture
fixtures_df = pd.DataFrame(fixtures)
fixtures_df = fixtures_df.sort_values(by=['id']).reset_index(drop=True)[['kickoff_time', 'team_h_difficulty', 'team_a_difficulty', 'team_h', 'team_a']]
fixtures_df['team_h'] = fixtures_df['team_h']-1
fixtures_df['team_a'] = fixtures_df['team_a']-1
fixtures_df.head()

Unnamed: 0,kickoff_time,team_h_difficulty,team_a_difficulty,team_h,team_a
0,2018-08-12T15:00:00Z,4,4,0,12
1,2018-08-11T14:00:00Z,2,3,1,4
2,2018-08-11T14:00:00Z,2,2,8,6
3,2018-08-11T14:00:00Z,4,2,9,5
4,2018-08-12T12:30:00Z,3,5,11,18


In [10]:
teams_df = pd.DataFrame(teams)[['name', 'short_name']]
teams_df.head(5)

Unnamed: 0,name,short_name
0,Arsenal,ARS
1,Bournemouth,BOU
2,Brighton,BHA
3,Burnley,BUR
4,Cardiff,CAR


In [11]:
# Add actual team names to
c = teams_df['short_name'][fixtures_df['team_h']].reset_index(drop=True)
d = teams_df['short_name'][fixtures_df['team_a']].reset_index(drop=True)
fixtures_df['team_h_name'] = c
fixtures_df['team_a_name'] = d
fixtures_df.head()

Unnamed: 0,kickoff_time,team_h_difficulty,team_a_difficulty,team_h,team_a,team_h_name,team_a_name
0,2018-08-12T15:00:00Z,4,4,0,12,ARS,MCI
1,2018-08-11T14:00:00Z,2,3,1,4,BOU,CAR
2,2018-08-11T14:00:00Z,2,2,8,6,FUL,CRY
3,2018-08-11T14:00:00Z,4,2,9,5,HUD,CHE
4,2018-08-12T12:30:00Z,3,5,11,18,LIV,WHU


In [12]:
# Add home and away teams to player dataframe
cols =['team_h', 'team_a', 'team_h_name', 'team_a_name', 'kickoff_time']
e = fixtures_df.iloc[player['fixture']][cols].reset_index(drop=True)
player[cols] = e
player.head()

Unnamed: 0,element,was_home,fixture,total_points,element_type,team_h,team_a,team_h_name,team_a_name,kickoff_time
0,0,True,0,3,0,0,12,ARS,MCI,2018-08-12T15:00:00Z
1,0,False,13,3,0,5,0,CHE,ARS,2018-08-18T16:30:00Z
2,0,True,20,3,0,0,18,ARS,WHU,2018-08-25T14:00:00Z
3,0,False,32,1,0,4,0,CAR,ARS,2018-09-02T12:30:00Z
4,0,False,45,2,0,14,0,NEW,ARS,2018-09-15T14:00:00Z


In [13]:
player['kickoff_time'] = pd.to_datetime(player['kickoff_time'], utc=True)
player.head()

Unnamed: 0,element,was_home,fixture,total_points,element_type,team_h,team_a,team_h_name,team_a_name,kickoff_time
0,0,True,0,3,0,0,12,ARS,MCI,2018-08-12 15:00:00+00:00
1,0,False,13,3,0,5,0,CHE,ARS,2018-08-18 16:30:00+00:00
2,0,True,20,3,0,0,18,ARS,WHU,2018-08-25 14:00:00+00:00
3,0,False,32,1,0,4,0,CAR,ARS,2018-09-02 12:30:00+00:00
4,0,False,45,2,0,14,0,NEW,ARS,2018-09-15 14:00:00+00:00


In [14]:
f_h = player['team_h'][player['was_home']]
f_a = player['team_a'][~player['was_home']]
f = f_h.append(f_a).sort_index()
player['team'] = f

g_h = player['team_h'][~player['was_home']]
g_a = player['team_a'][player['was_home']]
g = g_h.append(g_a).sort_index()
player['oppsition'] = g

In [15]:
h = teams_df['short_name'][player['team']].reset_index(drop=True)
player['team_name'] = h

k = teams_df['short_name'][player['oppsition']].reset_index(drop=True)
player['opposition_name'] = k

player.head()

Unnamed: 0,element,was_home,fixture,total_points,element_type,team_h,team_a,team_h_name,team_a_name,kickoff_time,team,oppsition,team_name,opposition_name
0,0,True,0,3,0,0,12,ARS,MCI,2018-08-12 15:00:00+00:00,0,12,ARS,MCI
1,0,False,13,3,0,5,0,CHE,ARS,2018-08-18 16:30:00+00:00,0,5,ARS,CHE
2,0,True,20,3,0,0,18,ARS,WHU,2018-08-25 14:00:00+00:00,0,18,ARS,WHU
3,0,False,32,1,0,4,0,CAR,ARS,2018-09-02 12:30:00+00:00,0,4,ARS,CAR
4,0,False,45,2,0,14,0,NEW,ARS,2018-09-15 14:00:00+00:00,0,14,ARS,NEW


In [16]:
player_test = player[['element', 'was_home', 'team_name', 'opposition_name', 'total_points']]
player_test.head()

Unnamed: 0,element,was_home,team_name,opposition_name,total_points
0,0,True,ARS,MCI,3
1,0,False,ARS,CHE,3
2,0,True,ARS,WHU,3
3,0,False,ARS,CAR,1
4,0,False,ARS,NEW,2


In [20]:
wsz = 3
mean_total_points = [None] * len(player)
median_total_points = [None] * len(player)
for n in range(0,len(player)):
    if n==0:
        mean_total_points[n] = 0
        median_total_points[n] = 0
    elif n>0 and n<=wsz:
        mean_total_points[n] = np.mean(player_test['total_points'][0:n])
        median_total_points[n] = np.median(player_test['total_points'][0:n])
    else:
        mean_total_points[n] = np.mean(player_test['total_points'][(n-wsz):n])
        median_total_points[n] = np.median(player_test['total_points'][(n-wsz):n])

meanl5g = pd.DataFrame(mean_total_points, columns=['mean_last_5_games'])
medianl5g = pd.DataFrame(median_total_points, columns=['median_last_5_games'])
player['mean_last_5_games'] = meanl5g
player['median_last_5_games'] = medianl5g

In [21]:
player.head(10)

Unnamed: 0,element,was_home,fixture,total_points,element_type,team_h,team_a,team_h_name,team_a_name,kickoff_time,team,oppsition,team_name,opposition_name,mean_last_5_games,median_last_5_games
0,0,True,0,3,0,0,12,ARS,MCI,2018-08-12 15:00:00+00:00,0,12,ARS,MCI,0.0,0.0
1,0,False,13,3,0,5,0,CHE,ARS,2018-08-18 16:30:00+00:00,0,5,ARS,CHE,3.0,3.0
2,0,True,20,3,0,0,18,ARS,WHU,2018-08-25 14:00:00+00:00,0,18,ARS,WHU,3.0,3.0
3,0,False,32,1,0,4,0,CAR,ARS,2018-09-02 12:30:00+00:00,0,4,ARS,CAR,3.0,3.0
4,0,False,45,2,0,14,0,NEW,ARS,2018-09-15 14:00:00+00:00,0,14,ARS,NEW,2.333333,3.0
5,0,True,50,11,0,0,7,ARS,EVE,2018-09-23 15:00:00+00:00,0,7,ARS,EVE,2.0,2.0
6,0,True,60,1,0,0,17,ARS,WAT,2018-09-29 14:00:00+00:00,0,17,ARS,WAT,4.666667,2.0
7,0,False,73,0,0,8,0,FUL,ARS,2018-10-07 11:00:00+00:00,0,8,ARS,FUL,4.666667,2.0
8,0,True,80,0,0,0,10,ARS,LEI,2018-10-22 19:00:00+00:00,0,10,ARS,LEI,4.0,1.0
9,0,False,92,0,0,6,0,CRY,ARS,2018-10-28 13:30:00+00:00,0,6,ARS,CRY,0.333333,0.0
