This notebook provides an example of how the data from the Fantasy Football Premier League can be retrieved, explored and used to predict game results. Most of the code for the initial data structuring of the players data and the lp_solve computation used for the team selection is from http://billmill.org/fantasypl/.

# 1. Retrieve player data from the Premier League API

In [None]:
# Retrieve data from the web API and store player data in a pickle file

import requests, cPickle, shutil, time

# Note: the max number of players expected might need to be changed depending on season
MAX_NUMBER_PLAYERS = 700

all = {}
outfile = open("players.data.pickle", "w")

for i in range(MAX_NUMBER_PLAYERS):
    playerurl = "http://fantasy.premierleague.com/web/api/elements/%s/"
    r = requests.get(playerurl % i)

    # skip non-existent players
    if r.status_code != 200: continue

    try:
        all[i] = r.json()
    except ValueError:
        continue
        
cPickle.dump(all, outfile)

outfile.close()

In [None]:
# Load player data from the pickle file

import cPickle
players = cPickle.load(open("players.data.pickle","rb"))
#players[1]

In [None]:
# Create classes for players for convenience of retrieving the data

# Note: the abbreviations might need to be updated based on season

team_abbreviations = {
    'Norwich': 'NOR',
    'Cardiff City': 'CAR',
    'Man City': 'MCI',
    'Newcastle': 'NEW',
    'West Brom': 'WBA',
    'West Ham': 'WHU',
    'Southampton': 'SOU',
    'Sunderland': 'SUN',
    'Stoke City': 'STK',
    'Crystal Palace': 'CRY',
    'Arsenal': 'ARS',
    'Swansea': 'SWA',
    'Liverpool': 'LIV',
    'Hull City': 'HUL',
    'Man Utd': 'MUN',
    'Everton': 'EVE',
    'Fulham': 'FUL',
    #'Tottenham': 'TOT',
    'Aston Villa': 'AVL',
    'Chelsea': 'CHE',
    'Bournemouth' : 'BOU',
    'Watford': 'WAT',
    'Stoke': 'STK',
    'Spurs': 'TOT',
    'Leicester' : 'LEI'
    
}

class Game(object):
    def __init__(self, game_json):
        self.opp = game_json[2][:3]
        self.loc = game_json[2][4] # "A" for away, "H" for home
        self.points = game_json[19]
        self.minutes = game_json[3]
    
    def __repr__(self):
        return "Game vs. %s %s: %s pts" % (self.opp, self.loc, self.points)
    
class Player(object):
    def __init__(self, player_json):
        self.raw = player_json
        self.games = [Game(g) for g in player_json["fixture_history"]["all"]]
        self.name = u"{first_name} {second_name}".format(**player_json)
        self.cost = player_json["now_cost"]
        self.position = player_json["type_name"]
        self.team = team_abbreviations[player_json["team_name"]]
        self.idn = player_json["id"]
        self.news = player_json["news"]
        #self.news_return = player_json["news_return"]
        self.pos = self.shortname(self.position)
        self.upcoming = self.get_upcoming_fixtures(player_json["fixtures"]["all"])
    
    def get_upcoming_fixtures(self, fixtures):
        upcoming = []
        for _, gameweek, opponent in fixtures:
            week = int(gameweek.split()[-1])
            if opponent == "-":
                continue
            opp, loc = opponent.split('(')
            opp = team_abbreviations[opp.strip()]
            loc = loc[0]
            upcoming.append((week, opp, loc))
        return upcoming
    
    def shortname(self, position):
        pos_abbreviations = {
            "Goalkeeper": "gk",
            "Defender": "d",
            "Midfielder": "m",
            "Forward": "f"
        }
        
        return pos_abbreviations[position]
    
    def __repr__(self):
        return "#%s %s %s £%s %s" % (self.idn, self.team, self.name.encode("ascii", "ignore"), self.cost, self.pos)

    def __unicode__(self):
        return "#%s %s £%s %s" % (self.idn, self.name, self.cost, self.pos)

player_objs = [Player(p) for p in players.itervalues()]

def find_player(needle):
    return [p for p in player_objs if needle.lower() in p.name.lower()]


# Test that the new class works:

p = find_player('Van Persie')[0]
print p.name
print p.position
print p.cost
print p.idn
print p.upcoming[:3]  # upcoming games
print p.games[:3]     # games he's already played
print p

# 2. Compute prediction factors and select a team by solving a linear optimisation problem

In [None]:
# Compute opponent factor

opponents = {}
for player in players.itervalues():
    for game in player["fixture_history"]["all"]:
        #skip games where the player played 0 minutes
        if game[3] == 0: continue
        opp = game[2][:3]
        pts = game[19]
        opponents.setdefault(opp, [0,0])[0] += pts
        opponents[opp][1] += 1

from collections import OrderedDict
avgs = {}
for opponent, (score, n) in opponents.iteritems():
    avgs[opponent] = score/float(n)

sorted_avgs = OrderedDict(sorted(avgs.items(), key=lambda t: t[1]))

avg_opponent = sum(avgs.values())/float(len(avgs))


In [None]:
# Compute homefield advantage

homeaway = {"A": 0, "H": 0}
n = 0.
for player in player_objs:
    #only consider full games to eliminate minute bias
    for game in [p for p in player.games if p.minutes == 90]:
        homeaway[game.loc] += game.points
        n += 1

homeaway["A"] /= n
homeaway["H"] /= n

homefield = homeaway["H"] - homeaway["A"]
print homefield

In [None]:
# Compute a player's expected value / points
#
# Factor in:
#   - The opponent 
#   - Whether the game is away or home can be factored in, but not as important as opponent
#   - Previous points obtained by the player


# TO-DO: factor in the news?

def adjusted_score(game):
    pts = game.points
    pts += homefield/2 if game.loc == "A" else -homefield/2
    pts += avg_opponent - avgs[game.opp]
    return pts
    
def adjusted_average(player):
    return sum(adjusted_score(g) for g in player.games) / len(player.games)

def game_value(game):
    adj = 0
    adj += homefield/2 if game[2] == "H" else -homefield/2
    adj += avgs[game[1]] - avg_opponent
    return adj
    
def expected_points(player, n=5):
    """return the number of expected points in the next n games"""
    av = adjusted_average(player)
    ev = 0.
    for game in player.upcoming[:n]:
        ev += av + game_value(game)
    return ev/n

print expected_points(find_player(u"Mutch")[0])
print expected_points(find_player(u"Sanogo")[0])
print expected_points(find_player(u"Kane")[0])

In [None]:
# Add expected points to the players

Player.expected_points = expected_points
player_objs = [Player(p) for p in players.itervalues()]
player_objs[1].expected_points()

In [None]:
# Solve the linear optimisation problem:
# Given the constraints:
#   Total player cost < 100
#   2 goalkeepers
#   5 defenders
#   5 midfielders
#   3 forwards
#   Maximize expected team value

# TO-DO: instead of 100, use the total player cost that is current (if the web api can return this)
# TO-DO: limitation on the number of players allowed from the same team (max players: 3)
# TO-DO: there are actually two situations: (1) choosing full team (2) making 1 or 2 transfers


In [None]:
# Solving the problem using lp-solve
# Download from: http://lpsolve.sourceforge.net/5.5/

def objective_function():
    m = " + ".join("{ev} {p.pos}{p.idn}".format(p=p, ev=p.expected_points())
                   for p in player_objs)
    
    return "max: " + m + ";\n"

def cost_constraint(max_price):
    c = " + ".join("{p.cost} {p.pos}{p.idn}".format(p=p)
                   for p in player_objs)
    
    return "cost_constraint: " + c + " <= %s;\n" % max_price

def position_constraints():
    constraints = StringIO.StringIO()

    gks = [p for p in player_objs if p.position == "Goalkeeper"]
    gk_list = " + ".join(("gk{p.idn}".format(**locals()) for p in gks))
    constraints.write("gk_limit: " + gk_list + " = 2;\n")
    
    ds = [p for p in player_objs if p.position == "Defender"]
    d_list = " + ".join(("d{p.idn}".format(**locals()) for p in ds))
    constraints.write("d_limit: " + d_list + " = 5;\n")
    
    ms = [p for p in player_objs if p.position == "Midfielder"]
    m_list = " + ".join(("m{p.idn}".format(**locals()) for p in ms))
    constraints.write("m_limit: " + m_list + " = 5;\n")
    
    fs = [p for p in player_objs if p.position == "Forward"]
    f_list = " + ".join(("f{p.idn}".format(**locals()) for p in fs))
    constraints.write("f_limit: " + f_list + " = 3;\n")
    
    return constraints.getvalue()

import StringIO
#create a buffer to hold all the constraints
buf = StringIO.StringIO()
buf.write(objective_function())
buf.write(cost_constraint(1000))
buf.write(position_constraints())

# I've skipped this, it's probably easier to skip the declaration of all the variable names?
# not very exciting
def all_player_variables():
    variables = ", ".join("{p.pos}{p.idn}".format(**locals()) for p in player_objs)
    return "bin %s;\n" % variables

buf.write(all_player_variables())

import subprocess, re

def get_player(idn):
    """given an id, return a player"""
    for p in player_objs:
        if p.idn == idn: return p
    raise ValueError("Unable to find player")
    
def return_team(lp):
    """run lp_solve ands return a list of player objects"""
    cmd = "echo '%s' | ./lp_solve" % lp
    val = subprocess.check_output(cmd, shell=True).split('\n')
    get_id = lambda l: int(re.search("^\w+?(\d+)", l).group(1))
    team_ids = [get_id(l) for l in val if re.search(r" 1$", l)]
    return map(get_player, team_ids)

return_team(buf.getvalue())

# 3. Current team

In [None]:
# Name or full name of current team members 
my_team = ['Butland','Huth','Targett','Kolarov','van Dijk',
           'Alderweireld','Arnautovic','Scott Sinclair',
           'Mahrez','Vardy','Harry Kane','Myhill','Mata','Mesut','Lukaku']

# In addition to a full team, how much money is there left in the bank:
BANK = 29

# Number of transfers (can be max 2):
TRANSFERS = 2

In [None]:
# Check if all teams are playing in the next gameweek, and if there are any news on any of the players

my_players = []
for member in my_team:
    p = find_player(member)[0]
    my_players.append(p)
    print p.name, '\t|', p.team, '\t|', p.position, '\t|', p.upcoming[:1], '\t|', p.cost, '\t|', p.news, '\t|', p.expected_points()

In [None]:
def get_team_fullname(abbrv):
    return list(team_abbreviations.keys())[list(team_abbreviations.values()).index(abbrv)] + ' ('+abbrv+')'

print get_team_fullname('MCI')

In [None]:
# How many players from each team?

from pprint import pprint

teams = {}
for player in my_players:
    if get_team_fullname(player.team) in teams: teams[get_team_fullname(player.team)] += 1
    else: teams[get_team_fullname(player.team)] = 1

pprint(teams)

In [None]:
# Games next week

for player in my_players:
    print get_team_fullname(player.team),'-',get_team_fullname(player.upcoming[:1][0][1])

In [None]:
# Compute total team value

budget = BANK
for player in my_players: budget += player.cost

print budget

In [None]:
transfers_left = TRANSFERS

# If there are any injuries, remove those first
    
# Then if there are transfers left, compute the best transfer(s) given the budget

# But, should it only take into consideration the budget available this week, or maybe also consider the moves available next week?
# During each gameweek, one transfer is allowed, and if a transfer is not used, it will be put on the next week (but there can't
# be more than 2 transfers in total per week)

# Also, maybe there is an optimisation that can be done when playing head2head, given that we know the opponent's team 
# (except 1 or 2 transfers that the opponent is allowed to make and will not be visible before the game)

In [None]:
# Note: we need to take into consideration the next min. 3 gameweeks
# and which teams play, as there are some weeks when only some teams play
# and the team should be maximised towards that

# Note: another strategy towards making the team could be to speculate
# on players for which the price will increase over time - what a bigger 
# budget would allow is more transfers and exchanging players of different values

# Note: the opponent has a max of 2 *free* transfers, but if the budget allows,
# one can buy transfers (think it is around 4 mil. but need to check)

# Note: the order of the players on the bench does matter!

# Note: take into consideration whether the players play in other championships
# as well and their game and participation might be affected by that.