This notebook provides an example of how the data from the Fantasy Football Premier League can be retrieved, explored and used to predict game results. Most of the code for the initial data structuring of the players data and the lp_solve computation used for the team selection is from http://billmill.org/fantasypl/.

# 1. Retrieve player data from the Premier League API

In [2]:
# Retrieve data from the web API and store player data in a pickle file

import requests, cPickle, shutil, time

# Note: the max number of players expected might need to be changed depending on season
MAX_NUMBER_PLAYERS = 700

all = {}
outfile = open("players.data.pickle", "w")

for i in range(MAX_NUMBER_PLAYERS):
    playerurl = "http://fantasy.premierleague.com/web/api/elements/%s/"
    r = requests.get(playerurl % i)

    # skip non-existent players
    if r.status_code != 200: continue

    try:
        all[i] = r.json()
    except ValueError:
        continue
        
cPickle.dump(all, outfile)

outfile.close()

In [3]:
# Load player data from the pickle file

import cPickle
players = cPickle.load(open("players.data.pickle","rb"))
#players[1]

{u'assists': 0,
 u'bonus': 0,
 u'bps': 0,
 u'chance_of_playing_next_round': 0,
 u'chance_of_playing_this_round': 0,
 u'clean_sheets': 0,
 u'code': 59936,
 u'cost_change_event': 0,
 u'cost_change_event_fall': 0,
 u'cost_change_start': 0,
 u'cost_change_start_fall': 0,
 u'current_fixture': u'Watford (H)',
 u'dreamteam_count': 0,
 u'ea_index': 0,
 u'element_type': 1,
 u'ep_next': u'0.0',
 u'ep_this': u'0.0',
 u'event_explain': [[u'Minutes played', 0, 0]],
 u'event_points': 0,
 u'event_total': 0,
 u'first_name': u'Wojciech',
 u'fixture_history': {u'all': [[u'09 Aug 13:30',
    1,
    u'WHU(H) 0-2',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    50,
    0],
   [u'16 Aug 13:30',
    2,
    u'CRY(A) 2-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -304,
    50,
    0],
   [u'24 Aug 20:00',
    3,
    u'LIV(H) 0-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
   

In [17]:
# Create classes for players for convenience of retrieving the data

# Note: the abbreviations might need to be updated based on season

team_abbreviations = {
    'Norwich': 'NOR',
    'Cardiff City': 'CAR',
    'Man City': 'MCI',
    'Newcastle': 'NEW',
    'West Brom': 'WBA',
    'West Ham': 'WHU',
    'Southampton': 'SOU',
    'Sunderland': 'SUN',
    'Stoke City': 'STK',
    'Crystal Palace': 'CRY',
    'Arsenal': 'ARS',
    'Swansea': 'SWA',
    'Liverpool': 'LIV',
    'Hull City': 'HUL',
    'Man Utd': 'MUN',
    'Everton': 'EVE',
    'Fulham': 'FUL',
    #'Tottenham': 'TOT',
    'Aston Villa': 'AVL',
    'Chelsea': 'CHE',
    'Bournemouth' : 'BOU',
    'Watford': 'WAT',
    'Stoke': 'STK',
    'Spurs': 'TOT',
    'Leicester' : 'LEI'
    
}

class Game(object):
    def __init__(self, game_json):
        self.opp = game_json[2][:3]
        self.loc = game_json[2][4] # "A" for away, "H" for home
        self.points = game_json[19]
        self.minutes = game_json[3]
    
    def __repr__(self):
        return "Game vs. %s %s: %s pts" % (self.opp, self.loc, self.points)
    
class Player(object):
    def __init__(self, player_json):
        self.raw = player_json
        self.games = [Game(g) for g in player_json["fixture_history"]["all"]]
        self.name = u"{first_name} {second_name}".format(**player_json)
        self.cost = player_json["now_cost"]
        self.position = player_json["type_name"]
        self.team = team_abbreviations[player_json["team_name"]]
        self.idn = player_json["id"]
        self.news = player_json["news"]
        #self.news_return = player_json["news_return"]
        self.pos = self.shortname(self.position)
        self.upcoming = self.get_upcoming_fixtures(player_json["fixtures"]["all"])
    
    def get_upcoming_fixtures(self, fixtures):
        upcoming = []
        for _, gameweek, opponent in fixtures:
            week = int(gameweek.split()[-1])
            if opponent == "-":
                continue
            opp, loc = opponent.split('(')
            opp = team_abbreviations[opp.strip()]
            loc = loc[0]
            upcoming.append((week, opp, loc))
        return upcoming
    
    def shortname(self, position):
        pos_abbreviations = {
            "Goalkeeper": "gk",
            "Defender": "d",
            "Midfielder": "m",
            "Forward": "f"
        }
        
        return pos_abbreviations[position]
    
    def __repr__(self):
        return "#%s %s %s £%s %s" % (self.idn, self.team, self.name.encode("ascii", "ignore"), self.cost, self.pos)

    def __unicode__(self):
        return "#%s %s £%s %s" % (self.idn, self.name, self.cost, self.pos)

player_objs = [Player(p) for p in players.itervalues()]

def find_player(needle):
    return [p for p in player_objs if needle.lower() in p.name.lower()]


# Test that the new class works:

p = find_player('Van Persie')[0]
print p.name
print p.position
print p.cost
print p.idn
print p.upcoming[:3]  # upcoming games
print p.games[:3]     # games he's already played
print p

Robin van Persie
Forward
95
248
[(33, 'TOT', u'A'), (34, 'AVL', u'H'), (34, 'CRY', u'H')]
[Game vs. TOT H: 0 pts, Game vs. AVL A: 0 pts, Game vs. NEW H: 0 pts]
#248 MUN Robin van Persie £95 f


# 2. Compute prediction factors and select a team by solving a linear optimisation problem

In [18]:
# Compute opponent factor

opponents = {}
for player in players.itervalues():
    for game in player["fixture_history"]["all"]:
        #skip games where the player played 0 minutes
        if game[3] == 0: continue
        opp = game[2][:3]
        pts = game[19]
        opponents.setdefault(opp, [0,0])[0] += pts
        opponents[opp][1] += 1

from collections import OrderedDict
avgs = {}
for opponent, (score, n) in opponents.iteritems():
    avgs[opponent] = score/float(n)

sorted_avgs = OrderedDict(sorted(avgs.items(), key=lambda t: t[1]))

avg_opponent = sum(avgs.values())/float(len(avgs))


In [19]:
# Compute homefield advantage

homeaway = {"A": 0, "H": 0}
n = 0.
for player in player_objs:
    #only consider full games to eliminate minute bias
    for game in [p for p in player.games if p.minutes == 90]:
        homeaway[game.loc] += game.points
        n += 1

homeaway["A"] /= n
homeaway["H"] /= n

homefield = homeaway["H"] - homeaway["A"]
print homefield

0.231981549106


In [20]:
# Compute a player's expected value / points
#
# Factor in:
#   - The opponent 
#   - Whether the game is away or home can be factored in, but not as important as opponent
#   - Previous points obtained by the player


# TO-DO: factor in the news?

def adjusted_score(game):
    pts = game.points
    pts += homefield/2 if game.loc == "A" else -homefield/2
    pts += avg_opponent - avgs[game.opp]
    return pts
    
def adjusted_average(player):
    return sum(adjusted_score(g) for g in player.games) / len(player.games)

def game_value(game):
    adj = 0
    adj += homefield/2 if game[2] == "H" else -homefield/2
    adj += avgs[game[1]] - avg_opponent
    return adj
    
def expected_points(player, n=5):
    """return the number of expected points in the next n games"""
    av = adjusted_average(player)
    ev = 0.
    for game in player.upcoming[:n]:
        ev += av + game_value(game)
    return ev/n

print expected_points(find_player(u"Mutch")[0])
print expected_points(find_player(u"Sanogo")[0])
print expected_points(find_player(u"Kane")[0])

0.774685166029
0.17515313289
0.871853934793


In [21]:
# Add expected points to the players

Player.expected_points = expected_points
player_objs = [Player(p) for p in players.itervalues()]
player_objs[1].expected_points()

0.7557982941802533

In [77]:
# Solve the linear optimisation problem:
# Given the constraints:
#   Total player cost < 100
#   2 goalkeepers
#   5 defenders
#   5 midfielders
#   3 forwards
#   Maximize expected team value

# TO-DO: instead of 100, use the total player cost that is current (if the web api can return this)
# TO-DO: limitation on the number of players allowed from the same team (max players: 3)
# TO-DO: there are actually two situations: (1) choosing full team (2) making 1 or 2 transfers


In [26]:
# Solving the problem using lp-solve
# Download from: http://lpsolve.sourceforge.net/5.5/

def objective_function():
    m = " + ".join("{ev} {p.pos}{p.idn}".format(p=p, ev=p.expected_points())
                   for p in player_objs)
    
    return "max: " + m + ";\n"

def cost_constraint(max_price):
    c = " + ".join("{p.cost} {p.pos}{p.idn}".format(p=p)
                   for p in player_objs)
    
    return "cost_constraint: " + c + " <= %s;\n" % max_price

def position_constraints():
    constraints = StringIO.StringIO()

    gks = [p for p in player_objs if p.position == "Goalkeeper"]
    gk_list = " + ".join(("gk{p.idn}".format(**locals()) for p in gks))
    constraints.write("gk_limit: " + gk_list + " = 2;\n")
    
    ds = [p for p in player_objs if p.position == "Defender"]
    d_list = " + ".join(("d{p.idn}".format(**locals()) for p in ds))
    constraints.write("d_limit: " + d_list + " = 5;\n")
    
    ms = [p for p in player_objs if p.position == "Midfielder"]
    m_list = " + ".join(("m{p.idn}".format(**locals()) for p in ms))
    constraints.write("m_limit: " + m_list + " = 5;\n")
    
    fs = [p for p in player_objs if p.position == "Forward"]
    f_list = " + ".join(("f{p.idn}".format(**locals()) for p in fs))
    constraints.write("f_limit: " + f_list + " = 3;\n")
    
    return constraints.getvalue()

import StringIO
#create a buffer to hold all the constraints
buf = StringIO.StringIO()
buf.write(objective_function())
buf.write(cost_constraint(1000))
buf.write(position_constraints())

# I've skipped this, it's probably easier to skip the declaration of all the variable names?
# not very exciting
def all_player_variables():
    variables = ", ".join("{p.pos}{p.idn}".format(**locals()) for p in player_objs)
    return "bin %s;\n" % variables

buf.write(all_player_variables())

import subprocess, re

def get_player(idn):
    """given an id, return a player"""
    for p in player_objs:
        if p.idn == idn: return p
    raise ValueError("Unable to find player")
    
def return_team(lp):
    """run lp_solve ands return a list of player objects"""
    cmd = "echo '%s' | ./lp_solve" % lp
    val = subprocess.check_output(cmd, shell=True).split('\n')
    get_id = lambda l: int(re.search("^\w+?(\d+)", l).group(1))
    team_ids = [get_id(l) for l in val if re.search(r" 1$", l)]
    return map(get_player, team_ids)

return_team(buf.getvalue())

[#3 ARS Petr Cech £58 gk,
 #7 ARS Hctor Bellern £64 d,
 #15 ARS Mesut zil £97 m,
 #60 BOU Charlie Daniels £50 d,
 #138 EVE Ross Barkley £71 m,
 #149 EVE Romelu Lukaku £90 f,
 #155 LEI Christian Fuchs £51 d,
 #163 LEI Riyad Mahrez £74 m,
 #170 LEI Jamie Vardy £78 f,
 #306 TOT Toby Alderweireld £65 d,
 #368 STK Marko Arnautovic £66 m,
 #421 WAT Heurelho Gomes £50 gk,
 #442 WAT Odion Ighalo £57 f,
 #497 NEW Georginio Wijnaldum £67 m,
 #586 SOU Virgil van Dijk £57 d]

# 3. Current team

In [73]:
# Name or full name of current team members 
my_team = ['Butland','Huth','Targett','Kolarov','van Dijk',
           'Alderweireld','Arnautovic','Scott Sinclair',
           'Mahrez','Vardy','Harry Kane','Myhill','Mata','Mesut','Lukaku']

# In addition to a full team, how much money is there left in the bank:
BANK = 29

# Number of transfers (can be max 2):
TRANSFERS = 2

In [69]:
# Check if all teams are playing in the next gameweek, and if there are any news on any of the players

my_players = []
for member in my_team:
    p = find_player(member)[0]
    my_players.append(p)
    print p.name, '\t|', p.team, '\t|', p.position, '\t|', p.upcoming[:1], '\t|', p.cost, '\t|', p.news, '\t|', p.expected_points()

Jack Butland 	| STK 	| Goalkeeper 	| [(33, 'LIV', u'A')] 	| 52 	| Ankle injury - Unknown return date 	| 3.9988822398
Robert Huth 	| LEI 	| Defender 	| [(33, 'SUN', u'A')] 	| 49 	|  	| 3.75090817657
Matt Targett 	| SOU 	| Defender 	| [(33, 'NEW', u'H')] 	| 38 	|  	| 2.00669296436
Aleksandar Kolarov 	| MCI 	| Defender 	| [(33, 'WBA', u'H')] 	| 59 	|  	| 3.10894606865
Virgil van Dijk 	| SOU 	| Defender 	| [(33, 'NEW', u'H')] 	| 57 	|  	| 4.43592659357
Toby Alderweireld 	| TOT 	| Defender 	| [(33, 'MUN', u'H')] 	| 65 	|  	| 4.31730794976
Marko Arnautovic 	| STK 	| Midfielder 	| [(33, 'LIV', u'A')] 	| 66 	|  	| 4.4051322398
Scott Sinclair 	| AVL 	| Midfielder 	| [(33, 'BOU', u'H')] 	| 41 	|  	| 1.64403806863
Riyad Mahrez 	| LEI 	| Midfielder 	| [(33, 'SUN', u'A')] 	| 74 	|  	| 6.71965817657
Jamie Vardy 	| LEI 	| Forward 	| [(33, 'SUN', u'A')] 	| 78 	|  	| 5.62590817657
Harry Kane 	| TOT 	| Forward 	| [(33, 'MUN', u'H')] 	| 104 	|  	| 5.66105794976
Boaz Myhill 	| WBA 	| Goalkeeper 	| [(33, '

In [66]:
def get_team_fullname(abbrv):
    return list(team_abbreviations.keys())[list(team_abbreviations.values()).index(abbrv)] + ' ('+abbrv+')'

print get_team_fullname('MCI')

Man City (MCI)


In [67]:
# How many players from each team?

from pprint import pprint

teams = {}
for player in my_players:
    if get_team_fullname(player.team) in teams: teams[get_team_fullname(player.team)] += 1
    else: teams[get_team_fullname(player.team)] = 1

pprint(teams)

{'Arsenal (ARS)': 1,
 'Aston Villa (AVL)': 1,
 'Everton (EVE)': 1,
 'Leicester (LEI)': 3,
 'Man City (MCI)': 1,
 'Man Utd (MUN)': 1,
 'Southampton (SOU)': 2,
 'Stoke (STK)': 2,
 'Tottenham (TOT)': 2,
 'West Brom (WBA)': 1}


In [68]:
# Games next week

for player in my_players:
    print get_team_fullname(player.team),'-',get_team_fullname(player.upcoming[:1][0][1])

Stoke (STK) - Liverpool (LIV)
Leicester (LEI) - Sunderland (SUN)
Southampton (SOU) - Newcastle (NEW)
Man City (MCI) - West Brom (WBA)
Southampton (SOU) - Newcastle (NEW)
Tottenham (TOT) - Man Utd (MUN)
Stoke (STK) - Liverpool (LIV)
Aston Villa (AVL) - Bournemouth (BOU)
Leicester (LEI) - Sunderland (SUN)
Leicester (LEI) - Sunderland (SUN)
Tottenham (TOT) - Man Utd (MUN)
West Brom (WBA) - Man City (MCI)
Man Utd (MUN) - Tottenham (TOT)
Arsenal (ARS) - West Ham (WHU)
Everton (EVE) - Watford (WAT)


In [75]:
# Compute total team value

budget = BANK
for player in my_players: budget += player.cost

print budget

1023


In [76]:
transfers_left = TRANSFERS

# If there are any injuries, remove those first
    
# Then if there are transfers left, compute the best transfer(s) given the budget

# But, should it only take into consideration the budget available this week, or maybe also consider the moves available next week?
# During each gameweek, one transfer is allowed, and if a transfer is not used, it will be put on the next week (but there can't
# be more than 2 transfers in total per week)

# Also, maybe there is an optimisation that can be done when playing head2head, given that we know the opponent's team 
# (except 1 or 2 transfers that the opponent is allowed to make and will not be visible before the game)