# Set up

In [78]:
import sys
sys.path.append('../../optimiser/')

In [79]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import optimiser

# Getting data

In [3]:
with open('../csvs/top_50_weekly_data.json') as f: 
    d = json.loads(f.read())

In [4]:
len(d)

1900

In [161]:
element_df = pd.read_csv('../csvs/element_gameweek_features_v06.csv')

# Analysis

## Top managers

In [82]:
picks_arr = [[i['element'] for i in d[j]['picks'] if i['position'] <= 11 and i['event'] <= 20] for j in range(0,1900)]

In [83]:
element_set = set([i for p in picks_arr for i in p])

In [84]:
position_dict = element_df[
    element_df['element'].isin(element_set)
][
    ['element', 'element_type']
].drop_duplicates().set_index('element').to_dict()['element_type']

In [85]:
picks_position_arr = [[position_dict[e] for e in p] for p in picks_arr]

In [86]:
def get_formation(pick):
    if pick == [1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4]:
        return '1-3-4-3'
    if pick == [1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4]:
        return '1-4-4-2'
    if pick == [1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4]:
        return '1-5-4-1'
    if pick == [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4]:
        return '1-4-5-1'
    if pick == [1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4]:
        return '1-4-3-3'
    if pick == [1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4]:
        return '1-3-5-2'
    if pick == [1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4]:
        return '1-5-3-2'
    if pick == [1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4]:
        return '1-5-2-3'
    

In [87]:
formation_arr = [get_formation(p) for p in picks_position_arr]

In [88]:
Counter(formation_arr)

Counter({'1-3-4-3': 414,
         '1-3-5-2': 172,
         '1-4-3-3': 117,
         '1-4-4-2': 221,
         None: 900,
         '1-4-5-1': 21,
         '1-5-4-1': 15,
         '1-5-3-2': 33,
         '1-5-2-3': 7})

## Optimiser

In [165]:
element_df['predicted_total_points'] = element_df['expected_total_points_against_opposition_team'] 

In [166]:
players_df = element_df[[
    'event',
    'element',
    'predicted_total_points',
    'value',
    'element_type',
    'team',
    'minutes',
    'safe_web_name',
    'total_points'
]].dropna()

In [167]:
formations = [
    [1, 3, 4, 3],
    [1, 4, 4, 2],
    [1, 5, 4, 1],
    [1, 4, 5, 1],
    [1, 4, 3, 3],
    [1, 3, 5, 2],
    [1, 5, 3, 2],
    [1, 5, 2, 3],
]

In [168]:
event_formation_points_arr = []

for e in range(2, 21):
    
    event_players = players_df[players_df['event'] == e].to_dict('records')
    
    for f in formations:
        
        event_formation_players, event_formation_bench = \
        optimiser.construct_optimal_team_from_scratch(
            event_players,
            formation=f)
        
        event_formation_points, _, _ = \
        optimiser.calculate_team_total_points(
            players_df,
            event_formation_players,
            event_formation_bench,
            e)
        
        event_formation_points_arr.append((e, '-'.join([str(i) for i in f]), event_formation_points))

In [169]:
formation_df = pd.DataFrame(event_formation_points_arr,
             columns=['event', 'formation', 'points']
            )

In [170]:
formation_df.groupby('formation')['points'].describe().round(1)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
formation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1-3-4-3,19.0,56.1,15.9,27.0,47.0,55.0,64.5,90.0
1-3-5-2,19.0,55.6,16.0,29.0,44.0,53.0,66.5,94.0
1-4-3-3,19.0,55.9,14.0,35.0,47.0,53.0,63.5,86.0
1-4-4-2,19.0,55.5,16.0,19.0,48.0,59.0,63.0,89.0
1-4-5-1,19.0,55.6,15.4,35.0,44.5,55.0,64.5,94.0
1-5-2-3,19.0,54.2,15.1,31.0,40.0,53.0,66.5,85.0
1-5-3-2,19.0,56.2,15.5,29.0,45.0,59.0,64.5,83.0
1-5-4-1,19.0,57.8,18.6,19.0,44.5,60.0,69.5,92.0
