In [74]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import re
from time import time


def get_data(soup):
    elements = str(soup).split("{")
    players = []
    teams = []
    team_id = 1
    abbrevs = []
    for element in elements:
        if '\"web_name\"' in element:
            player = element[:-2].split(",")
            players.append(player)
        elif 'opponent' in element:
            full = element[:-2].split(",")[6:-1]
            team = ['\"team_code\":%i' % (team_id)]
            team_id += 1
            for part in full:
                #remove special characters from string
                clean_part = re.sub('[[{!@#$}]]', '', part)
                team.append(clean_part)
            teams.append(team)
        elif 'abbr' in element:
            full = element[:-2].split(",")[1::2]
            abbrev = []
            for part in full:
                #remove special characters from string
                clean_part = re.sub('[[{!@#$}]]', '', part)
                abbrev.append(clean_part)
            abbrevs.append(abbrev)
    #tidy up issues with last player/team/abbrev
    del players[-1][-1]
    players[-1][-1] = players[-1][-1][:-2]  
    del teams[-1][-1]
    del abbrevs[-1][-1]
    return players, teams, abbrevs


def get_headings(subject):
    template = subject[0]
    headings = []
    for element in template:
        heading = ''
        for char in str(element):
            if char == ':':
                break
            elif char != '\"':
                heading += char
        headings.append(heading)
    return headings


def get_values(subject):
    values = []
    for element in subject:
        value = ''
        initial = True
        for char in str(element):
            if char == ':':
                initial = False
            elif initial == False and char != '\"':
                value += char
        values.append(value)
    return values


### start of code, general initialisation ###
start_time = time()
bootstrap_static = "https://fantasy.premierleague.com/drf/bootstrap-static"
page = urlopen(bootstrap_static)
soup = BeautifulSoup(page, "html.parser")
players, teams, heading_abbrevs = get_data(soup)
player_headings = get_headings(players)
team_headings = get_headings(teams)

# make player data into dataframe
player_values = []
for i in range(len(players)):
    player_values.append(get_values(players[i]))

df_players_full = pd.DataFrame(player_values, columns=player_headings)
df_players_full = df_players_full.apply(pd.to_numeric, errors='ignore')
df_players_full['now_cost'] /= 10
df_players_full['PP90'] = (90 * df_players_full['total_points'] / df_players_full['minutes'])
df_players_full['PP90PM'] = df_players_full['PP90'] / df_players_full['now_cost']
keep_player_headings = ['web_name', 'total_points', 'PP90', 'PP90PM', 'now_cost', 'goals_scored', 'assists', 
                        'clean_sheets', 'bonus', 'bps', 'minutes', 
                        'saves', 'element_type', 'team']
df_players = df_players_full[keep_player_headings]
df_players.style.format({'now_cost': "{:.1f}", 'PP90': '{:.2f}', 'PP90PM': '{:.2f}'})
#df_players

# make team data into dataframe
team_values = []
for i in range(len(teams)):
    team_values.append(get_values(teams[i]))

df_teams_full = pd.DataFrame(team_values, columns=team_headings)
df_teams_full = df_teams_full.apply(pd.to_numeric, errors='ignore')
keep_team_headings = ['team_code', 'name', 'short_name', 'strength', 'position', 'played', 'win', 'loss', 'draw', 
                      'points', 'form', 'strength_overall_home', 'strength_overall_away', 'strength_attack_home', 
                      'strength_attack_away', 'strength_defence_home', 'strength_defence_away']
df_teams = df_teams_full[keep_team_headings]
#df_teams

team_codes = {'Arsenal': 1, 'Bournemouth': 2, 'Brighton': 3, 'Burnley': 4, 'Cardiff': 5, 'Chelsea': 6, 'Crystal Palace': 7,
             'Everton': 8, 'Fulham': 9, 'Huddersfield': 10, 'Leicester': 11, 'Liverpool': 12, 'Man City': 13, 'Man Utd': 14,
             'Newcastle': 15, 'Southampton': 16, 'Spurs': 17, 'Watford': 18, 'West Ham': 19, 'Wolves': 20}
position_codes = {'G': 1, 'D': 2, 'M': 3, 'F': 4}



print("Time: %.3f s" % (time()-start_time))

Time: 2.506 s


In [80]:
start_time = time()

#function to sort and filter players
def sort_by(df, field, position, team, cost, minutes, team_codes, position_codes):    
    df_sorted =  df.sort_values(by=[field], ascending=False)
    if team != False and position != False:
        df_filtered = df_sorted.loc[(df_sorted['team'] == team_codes[team]) & (df_sorted['element_type'] == position_codes[position])
                               & (df_sorted['now_cost'] <= cost) & (df_sorted['minutes'] >= minutes)]
        return df_filtered
    elif position != False:
        df_filtered = df_sorted.loc[(df_sorted['element_type'] == position_codes[position])
                               & (df_sorted['now_cost'] <= cost) & (df_sorted['minutes'] >= minutes)]
        return df_filtered
    else:
        df_filtered = df_sorted.loc[(df_sorted['now_cost'] <= cost) & (df_sorted['minutes'] >= minutes)]
        return df_filtered

    
field = 'PP90PM'
position = 'M'
team = False
max_cost = 150
min_minutes = 1000

df_players_filtered = sort_by(df_players, field, position, team, max_cost, min_minutes, team_codes, position_codes)
df_players_filtered.style.format({'now_cost': "{:.1f}", 'PP90': '{:.2f}', 'PP90PM': '{:.2f}'})



#print("Time: %.3f s" % (time()-start_time))

Unnamed: 0,web_name,total_points,PP90,PP90PM,now_cost,goals_scored,assists,clean_sheets,bonus,bps,minutes,saves,element_type,team
343,Martial,125,7.16,0.95,7.5,9,6,9,7,403,1572,0,3,14
342,Lingard,128,6.38,0.91,7.0,8,6,9,10,450,1807,0,3,14
371,Kenedy,51,4.49,0.9,5.0,2,2,6,5,171,1023,0,3,15
316,Gündogan,82,4.91,0.89,5.5,4,2,6,9,376,1504,0,3,13
318,Bernardo Silva,112,6.66,0.89,7.5,6,5,9,8,440,1514,0,3,13
36,Stanislas,75,5.25,0.88,6.0,5,3,2,7,285,1285,0,3,2
12,Ramsey,130,6.34,0.85,7.5,7,10,9,12,503,1844,0,3,1
384,Davis,71,4.22,0.84,5.0,3,2,4,6,337,1515,0,3,16
136,Willian,130,6.24,0.83,7.5,6,8,9,13,522,1874,0,3,6
417,Son,178,6.99,0.82,8.5,12,8,14,16,544,2292,0,3,17
