### Scripts to collect and process data

#### Setup

In [1]:
import json
import numpy as np
import os
import pandas as pd
import requests

In [2]:
LEAGUE_CODE = 284342 
LEAGUE_STANDINGS_URL = 'https://fantasy.premierleague.com/api/leagues-classic/{}/standings/'.format(LEAGUE_CODE)

#### GW1, GW2, and Overview Data

GW1 data was scrapped using Postman after GW1 ended - during GW2. This data will be used to extract initial player data, and GW1 results.

In [3]:
gw2_file = open(os.getcwd() + '/Raw_Data/gw2.json')
gw2_raw = json.load(gw2_file)
gw2_file.close()

Get player data from GW1 data:

In [4]:
players = pd.DataFrame(gw2_raw['standings']['results']).drop(columns = ['id', 'event_total', 'rank', 'last_rank', 'rank_sort', 'total']).set_index('entry').sort_values(by = 'player_name')
players.head()

Unnamed: 0_level_0,player_name,entry_name
entry,Unnamed: 1_level_1,Unnamed: 2_level_1
6411475,Aditya Balaji,love cesc dhoka
80353,Anish Bharatrajan,Hoeland
2522365,Kunal Agrawal,Expected Toulouse
1460606,Mayank Aggarwal,FalconRaiders
1478259,Moyukh Banerjee,Silvo's Berluscrony


In [5]:
players.to_csv(os.getcwd() + '/Data/player_data.csv')

Get GW1 data:

In [6]:
gw1_df = pd.DataFrame(gw2_raw['standings']['results']).drop(columns = ['id', 'rank', 'last_rank', 'rank_sort']).set_index('entry')

gw1_df['event_total'] = gw1_df['total'] - gw1_df['event_total']  # gw1 score = total score - gw2 score
gw1_df['total'] = gw1_df['event_total']  # re-assign total scores to 'current' score

# sort values to get ranks
gw1_df = gw1_df.sort_values(by = 'event_total', ascending = False)
gw1_df['rank'] = np.arange(len(gw1_df)) + 1
# fix ranks for tied points
gw1_df.loc[3402797, 'rank'] = 7
gw1_df.loc[53549, 'rank'] = 14

gw1_df.head()

Unnamed: 0_level_0,event_total,player_name,total,entry_name,rank
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
80353,112,Anish Bharatrajan,112,Hoeland,1
6499681,108,Ranjib Rudra,108,ShawbertoSanchoddas,2
1478259,99,Moyukh Banerjee,99,Silvo's Berluscrony,3
5728251,97,dev maheshwari,97,The Mancunian way,4
8971,94,Pavan R,94,groÃŸ misconduct,5


In [7]:
gw1_df.to_csv(os.getcwd() + '/Data/gw1.csv')

Get GW2 data:

In [8]:
gw2_df = pd.DataFrame(gw2_raw['standings']['results']).drop(columns = ['id', 'last_rank', 'rank_sort']).set_index('entry')
gw2_df.head()

Unnamed: 0_level_0,event_total,player_name,rank,total,entry_name
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
80353,88,Anish Bharatrajan,1,200,Hoeland
8971,94,Pavan R,2,188,groÃŸ misconduct
5728251,74,dev maheshwari,3,171,The Mancunian way
1478259,69,Moyukh Banerjee,4,168,Silvo's Berluscrony
6499681,57,Ranjib Rudra,5,165,ShawbertoSanchoddas


In [9]:
gw2_df.to_csv(os.getcwd() + '/Data/gw2.csv')

For the overview data, each column is a GW, rows are players (indexed by entry), and attribute values are corresponding GW scores.

In [10]:
overview_init = pd.DataFrame(gw2_raw['standings']['results']).drop(columns = ['id', 'rank', 'last_rank', 'rank_sort', 'player_name', 'entry_name']).set_index('entry')

overview_init['event_total'] = overview_init['total'] - overview_init['event_total']
overview_init['total'] = overview_init['event_total']

overview_init = overview_init.drop(columns = ['total']).rename(columns = {'event_total': 'GW1'}).sort_values(by = 'GW1', ascending = False).join(gw2_df).rename(columns = {'event_total': 'GW2'}).drop(columns = ['player_name', 'rank', 'total', 'entry_name'])

overview_init.head()

Unnamed: 0_level_0,GW1,GW2
entry,Unnamed: 1_level_1,Unnamed: 2_level_1
80353,112,88
6499681,108,57
1478259,99,69
5728251,97,74
8971,94,94


In [11]:
overview_init.to_csv(os.getcwd() + '/Data/overview.csv')

In [12]:
# overview_init.join(players)

#### Get data for current GW:

In [3]:
def get_data():
    try:
        res = requests.get(LEAGUE_STANDINGS_URL)
        
        df = pd.DataFrame(res.json()['standings']['results']).drop(columns = ['id', 'last_rank', 'rank_sort']).set_index('entry')
        
        return res.status_code, df
    
    except Exception as e:
        return res.status_code, None        

In [4]:
status, curr_gw_df = get_data()
status

200

In [5]:
curr_gw_df.head()

Unnamed: 0_level_0,event_total,player_name,rank,total,entry_name
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8971,73,Pavan R,1,334,groß misconduct
80353,74,Anish Bharatrajan,2,325,Hoeland
5728251,75,dev maheshwari,3,311,The Mancunian way
6499681,55,Ranjib Rudra,4,280,ShawbertoSanchoddas
3309014,62,Sudhanshu Srivastava,5,279,Finding Timo


In [6]:
curr_gw_df.to_csv(os.getcwd() + '/Data/gw4.csv')

Update overview file:

In [7]:
overview = pd.read_csv(os.getcwd() + '/Data/overview.csv').set_index('entry')
overview.head()

Unnamed: 0_level_0,GW1,GW2,GW3
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
80353,112,88,55
6499681,108,57,64
1478259,99,69,49
5728251,97,74,65
8971,94,94,73


In [8]:
overview.join(curr_gw_df).drop(columns = ['player_name', 'rank', 'total', 'entry_name']).rename(columns = {'event_total': 'GW4'}).to_csv(os.getcwd() + '/Data/overview.csv')