# Set up

In [1]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
import requests

# Getting data

In [2]:
entry_history_arr = []

for i in np.random.choice(range(1, 3000000), 1000000, replace=False):
    try:
        entry_history_request = requests.get(f'https://fantasy.premierleague.com/api/entry/{i}/history/')
        entry_history_data = entry_history_request.json()
        entry_history_data['entry'] = i
        entry_history_arr.append(entry_history_data)
    except Exception as e:
        continue

print(len(entry_history_arr))

KeyboardInterrupt: 

In [None]:
entry_past_df = pd.DataFrame([
    dict([
        ('entry', i['entry'])        
    ] +
    [
        (j['season_name'] + '_total_points', j['total_points'])
        for j in i['past']
    ] +
    [
        (j['season_name'] + '_rank', j['rank'])
        for j in i['past']
    ])
    for i in entry_history_arr
])


entry_past_cols = entry_past_df.columns
entry_past_cols = ['entry'] + list(entry_past_cols[:-1])[::-1]

entry_past_df = entry_past_df[entry_past_cols]

entry_past_df.head()

In [None]:
top_entries = entry_past_df[
    (entry_past_df['2018/19_rank'] < 30000)
    & (entry_past_df['2017/18_rank'] < 30000)
    & (entry_past_df['2016/17_rank'] < 30000)
]['entry'].values

len(top_entries)

In [None]:
bootstrap_request = requests.get('https://fantasy.premierleague.com/api/bootstrap-static/')
bootstrap_data = bootstrap_request.json()

current_event = [i for i in bootstrap_data['events'] if i['is_current']][0]['id']
current_event

In [None]:
element_df = pd.DataFrame(
    bootstrap_data['elements']
)

element_df = element_df[['id', 'web_name']]
element_df.columns = ['element', 'web_name']

In [None]:
entry_season_arr = []

for i in top_entries:
    for j in range(1, current_event + 1):
        try:
            entry_season_request = requests.get(f'https://fantasy.premierleague.com/api/entry/{i}/event/{j}/picks/')
            entry_season_data = entry_season_request.json()
            entry_season_data['entry'] = i
            entry_season_data['event'] = j
            entry_season_arr.append(entry_season_data)
        except Exception as e:
            continue

print(len(entry_season_arr))

In [3]:
current_event_arr = [i for i in entry_season_arr if i['event'] == current_event]

NameError: name 'entry_season_arr' is not defined

In [None]:
current_event_picks_df = pd.DataFrame([j for sl in [i['picks'] for i in current_event_arr] for j in sl])

current_event_picks_df['is_first_team'] = current_event_picks_df['position'].apply(lambda x: 1 if x <= 11 else 0)
current_event_picks_df['is_owned'] = 1

current_event_picks_df = current_event_picks_df.groupby('element', as_index=False).sum().round(2).join(
    element_df.set_index('element'), on='element'
).sort_values('is_owned', ascending=False)

current_event_picks_df = current_event_picks_df[
    ['element', 'web_name', 'is_owned', 'is_first_team', 'is_captain', 'is_vice_captain']]

# Analysing data

## Player selections

In [None]:
plt.figure(figsize=(12, 8))
plt.bar(
    current_event_picks_df['web_name'].iloc[:30],
    current_event_picks_df['is_owned'].iloc[:30],   
)
plt.xticks(rotation='vertical')
plt.grid()
plt.show()

## Wildcards

In [None]:
wildcard_df = pd.DataFrame(
    [dict([('event', i['event'])]) for i in entry_season_arr if i['active_chip'] == 'wildcard'])

(wildcard_df.groupby('event').size()/len(top_entries)).round(2)

## Current performance

In [None]:
plt.figure(figsize=(10, 8))
plt.hist(
    pd.Series([i['entry_history']['overall_rank'] for i in current_event_arr]),
    bins=20,
    cumulative=True,
    density=True
)
plt.grid()
plt.show()

## Historic performance

In [None]:
hist_perf_df = entry_past_df[[
    'entry',
    '2018/19_rank',
    '2017/18_rank',
    '2016/17_rank'
]].dropna()

In [None]:
plt.figure(figsize=(10, 8))
plt.hist(
    hist_perf_df[hist_perf_df['2018/19_rank'] < 20000]['2017/18_rank'],
    bins=50,
    alpha=0.5,
    density=True,
    cumulative=True
)
plt.hist(
    hist_perf_df[hist_perf_df['2018/19_rank'] < 20000]['2016/17_rank'],
    bins=50,
    alpha=0.5,
    density=True,
    cumulative=True
)
plt.xlim(0, 1000000)
plt.grid()
plt.show()

In [None]:
hist_perf_df['avg_rank'] = (hist_perf_df['2017/18_rank'] + hist_perf_df['2016/17_rank'])/2

plt.figure(figsize=(10, 8))
plt.scatter(
    hist_perf_df[hist_perf_df['avg_rank'] < 100000]['2018/19_rank'],
    hist_perf_df[hist_perf_df['avg_rank'] < 100000]['avg_rank'],
    alpha=0.25
)
plt.grid()
plt.show()

In [None]:
np.corrcoef(
    hist_perf_df[hist_perf_df['avg_rank'] < 100000]['2018/19_rank'],
    hist_perf_df[hist_perf_df['avg_rank'] < 100000]['avg_rank']
)