# Set up

In [78]:
import sys
sys.path.append('../../optimiser/')

In [79]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import optimiser

# Getting data

In [3]:
with open('../csvs/top_50_weekly_data.json') as f: 
    d = json.loads(f.read())

In [4]:
len(d)

1900

In [161]:
element_df = pd.read_csv('../csvs/element_gameweek_features_v06.csv')

# Analysis

## Top managers

In [82]:
picks_arr = [[i['element'] for i in d[j]['picks'] if i['position'] <= 11 and i['event'] <= 20] for j in range(0,1900)]

In [83]:
element_set = set([i for p in picks_arr for i in p])

In [84]:
position_dict = element_df[
    element_df['element'].isin(element_set)
][
    ['element', 'element_type']
].drop_duplicates().set_index('element').to_dict()['element_type']

In [85]:
picks_position_arr = [[position_dict[e] for e in p] for p in picks_arr]

In [86]:
def get_formation(pick):
    if pick == [1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4]:
        return '1-3-4-3'
    if pick == [1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4]:
        return '1-4-4-2'
    if pick == [1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4]:
        return '1-5-4-1'
    if pick == [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4]:
        return '1-4-5-1'
    if pick == [1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4]:
        return '1-4-3-3'
    if pick == [1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4]:
        return '1-3-5-2'
    if pick == [1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4]:
        return '1-5-3-2'
    if pick == [1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4]:
        return '1-5-2-3'
    

In [87]:
formation_arr = [get_formation(p) for p in picks_position_arr]

In [88]:
Counter(formation_arr)

Counter({'1-3-4-3': 414,
         '1-3-5-2': 172,
         '1-4-3-3': 117,
         '1-4-4-2': 221,
         None: 900,
         '1-4-5-1': 21,
         '1-5-4-1': 15,
         '1-5-3-2': 33,
         '1-5-2-3': 7})

## Optimiser

In [172]:
element_df['predicted_total_points'] = element_df['total_points'] 

In [173]:
players_df = element_df[[
    'event',
    'element',
    'predicted_total_points',
    'value',
    'element_type',
    'team',
    'minutes',
    'safe_web_name',
    'total_points'
]].dropna()

In [174]:
formations = [
    [1, 3, 4, 3],
    [1, 4, 4, 2],
    [1, 5, 4, 1],
    [1, 4, 5, 1],
    [1, 4, 3, 3],
    [1, 3, 5, 2],
    [1, 5, 3, 2],
    [1, 5, 2, 3],
]

In [176]:
event_formation_points_arr = []

for e in range(1, 21):
    print('event', e)
    
    event_players = players_df[players_df['event'] == e].to_dict('records')
    
    for f in formations:
        
        event_formation_players, event_formation_bench = \
        optimiser.construct_optimal_team_from_scratch(
            event_players,
            formation=f)
        
        event_formation_points, _, _ = \
        optimiser.calculate_team_total_points(
            players_df,
            event_formation_players,
            event_formation_bench,
            e)
        
        print(f, event_formation_points)
        
        event_formation_points_arr.append((e, '-'.join([str(i) for i in f]), event_formation_points))

event 1
[1, 3, 4, 3] 144
[1, 4, 4, 2] 149
[1, 5, 4, 1] 155
[1, 4, 5, 1] 154
[1, 4, 3, 3] 143
[1, 3, 5, 2] 149
[1, 5, 3, 2] 149
[1, 5, 2, 3] 141
event 2
[1, 3, 4, 3] 161
[1, 4, 4, 2] 164
[1, 5, 4, 1] 160
[1, 4, 5, 1] 162
[1, 4, 3, 3] 162
[1, 3, 5, 2] 163
[1, 5, 3, 2] 162
[1, 5, 2, 3] 159
event 3
[1, 3, 4, 3] 133
[1, 4, 4, 2] 134
[1, 5, 4, 1] 134
[1, 4, 5, 1] 132
[1, 4, 3, 3] 135
[1, 3, 5, 2] 132
[1, 5, 3, 2] 136
[1, 5, 2, 3] 136
event 4
[1, 3, 4, 3] 131
[1, 4, 4, 2] 129
[1, 5, 4, 1] 126
[1, 4, 5, 1] 126
[1, 4, 3, 3] 132
[1, 3, 5, 2] 128
[1, 5, 3, 2] 129
[1, 5, 2, 3] 131
event 5
[1, 3, 4, 3] 156
[1, 4, 4, 2] 155
[1, 5, 4, 1] 152
[1, 4, 5, 1] 154
[1, 4, 3, 3] 154
[1, 3, 5, 2] 156
[1, 5, 3, 2] 152
[1, 5, 2, 3] 149
event 6
[1, 3, 4, 3] 148
[1, 4, 4, 2] 145
[1, 5, 4, 1] 140
[1, 4, 5, 1] 143
[1, 4, 3, 3] 144
[1, 3, 5, 2] 147
[1, 5, 3, 2] 140
[1, 5, 2, 3] 138
event 7
[1, 3, 4, 3] 150
[1, 4, 4, 2] 153
[1, 5, 4, 1] 155
[1, 4, 5, 1] 155
[1, 4, 3, 3] 151
[1, 3, 5, 2] 152
[1, 5, 3, 2] 153
[1, 5, 2,

In [177]:
formation_df = pd.DataFrame(event_formation_points_arr,
             columns=['event', 'formation', 'points']
            )

In [178]:
formation_df.groupby('formation')['points'].describe().round(1)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
formation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1-3-4-3,20.0,148.8,12.2,126.0,141.5,149.5,159.5,168.0
1-3-5-2,20.0,150.4,13.7,124.0,143.2,149.5,163.0,172.0
1-4-3-3,20.0,146.8,11.7,122.0,137.0,146.5,157.5,163.0
1-4-4-2,20.0,149.1,13.2,121.0,140.2,148.0,161.0,167.0
1-4-5-1,20.0,148.8,14.7,119.0,138.2,150.0,159.8,169.0
1-5-2-3,20.0,142.8,10.7,118.0,135.2,142.0,152.0,159.0
1-5-3-2,20.0,146.4,12.5,117.0,136.0,147.0,156.2,163.0
1-5-4-1,20.0,146.7,14.2,115.0,136.0,148.0,157.0,166.0
