In [1]:
import pandas as pd
import requests
import csv
import re
import math
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
import plotly.figure_factory as ff
import datetime
import numpy as np
init_notebook_mode(connected=True)

In [2]:
with open('token.txt', 'r') as fileobj:
    token = fileobj.read().strip()

In [3]:
API_TOKEN = token
URL_ROOT = 'http://api.football-data.org/'
API_ROOT = 'v1/competitions/'

In [4]:
headers = {'X-Auth-Token': API_TOKEN, 'X-Response-Control': 'minified'}

In [5]:
PREMIERSHIP_ID = '445/'
PREMIERSHIP_ROOT = URL_ROOT + API_ROOT + PREMIERSHIP_ID

CHAMPIONSHIP_ID = '446/'
CHAMPIONSHIP_ROOT = URL_ROOT + API_ROOT + CHAMPIONSHIP_ID

LEAGUE1_ID = '447/'
LEAGUE1_ROOT = URL_ROOT + API_ROOT + LEAGUE1_ID

LEAGUE2_ID = '448/'
LEAGUE2_ROOT = URL_ROOT + API_ROOT + LEAGUE2_ID

ROOTS = [PREMIERSHIP_ROOT, CHAMPIONSHIP_ROOT, LEAGUE1_ROOT, LEAGUE2_ROOT]

In [6]:
rt = requests.get(ROOTS[0] + 'teams/', headers=headers)

In [7]:
rf = requests.get(ROOTS[0] + 'fixtures/', headers=headers)

In [8]:
rff = rf.json()

In [9]:
rft = rt.json()

In [10]:
team_names = [team_dict['name'] for team_dict in rft['teams']]

In [11]:
def date_reader(date_string):
    rem = re.match(r'\d{4}-\d{2}-\d{2}', date_string)
    return rem.group(0)

list_of_fixtures = rff.get('fixtures')

with open('scoresheet.csv', 'wt') as file:
    writer = csv.writer(file)
    writer.writerow(('Date', 'Home Team', 'Home Score', 'Away Score', 'Away Team'))
    for fixture in list_of_fixtures:
        if fixture['status'] == 'FINISHED':
            scores = fixture['result']
            writer.writerow((
                date_reader(fixture['date']),
                fixture['homeTeamName'],
                scores['goalsHomeTeam'],
                scores['goalsAwayTeam'],
                fixture['awayTeamName'],
            ))

In [12]:
df = pd.read_csv('scoresheet.csv')

In [13]:
df['Date'] = pd.to_datetime(df['Date'])

In [14]:
total_games = df.shape[0]

league_average_home_goals_scored = df['Home Score'].sum() / total_games
league_average_away_goals_scored = df['Away Score'].sum() / total_games
league_average_goals_conceded_at_home = league_average_away_goals_scored
league_average_goals_conceded_at_away = league_average_home_goals_scored

team_strengths = {}

for team in team_names:
    team_home_matches = df[df['Home Team'] == team]
    team_home_games = team_home_matches.shape[0]
    
    if not team_home_matches.empty:
        team_home_goals = team_home_matches['Home Score'].sum()
        if team_home_goals:
            home_attack_strength = (
                team_home_goals / team_home_games
            ) / league_average_home_goals_scored
        else:
            home_attack_strength = 1
        team_home_conceded = team_home_matches['Away Score'].sum()
        if team_home_conceded:
            home_defence_strength = (
                team_home_conceded / team_home_games
            ) / league_average_goals_conceded_at_home
        else:
            home_defence_strength = 1
    else:
        home_attack_strength = 1
        home_defence_strength = 1
    
    team_away_matches = df[df['Away Team'] == team]
    team_away_games = team_away_matches.shape[0]
    
    if not team_away_matches.empty:
        team_away_goals = team_away_matches['Away Score'].sum()
        if team_away_goals:
            away_attack_strength = (
                team_away_goals / team_away_games
            ) / league_average_away_goals_scored
        else:
            away_attack_strength = 1
        team_away_conceded = team_away_matches['Home Score'].sum()
        if team_away_conceded:
            away_defence_strength = (
                team_away_conceded / team_away_games
            ) / league_average_goals_conceded_at_away
        else:
            away_defence_strength = 1
    else:
        away_attack_strength = 1
        away_defence_strength = 1
        
    team_strengths.update({team: {
        'home_attack': home_attack_strength,
        'home_defence': home_defence_strength,
        'away_attack': away_attack_strength,
        'away_defence': away_defence_strength
    }})

In [15]:
def poisson_goals(goals, mu):
    return ( mu ** goals * math.e ** -mu ) / math.factorial(goals)

In [16]:
def teams_data_tuple(teamHome, teamAway):
    teamHomeAttack = team_strengths[teamHome]['home_attack']
    teamAwayAttack = team_strengths[teamAway]['away_attack']
    teamHomeDefence = team_strengths[teamHome]['home_defence']
    teamAwayDefence = team_strengths[teamAway]['away_defence']
    
    return teamHomeAttack, teamAwayAttack, teamHomeDefence, teamAwayDefence

In [17]:
def table_strengths(strengths_dict):
    table_data = [
        ['Team', 'Home Attack', 'Home Defence', 'Away Attack', 'Away Defence'],
    ]
    table_data.append(
        [
            'League Average', round(league_average_home_goals_scored, 5), round(league_average_goals_conceded_at_home, 5),
            round(league_average_away_goals_scored, 5), round(league_average_goals_conceded_at_away, 5),
        ]
    )
    for key, value in strengths_dict.items():
        table_data.append(
            [key, round(value['home_attack'], 5), round(value['home_defence'], 5), round(value['away_attack'], 5), round(value['away_defence'], 5)]
        )
    table = ff.create_table(table_data)
    iplot(table)

In [18]:
def match_prediction(home_team, away_team):

    home_at, away_at, home_def, away_def = teams_data_tuple(home_team, away_team)
    
    home_team_goals = home_at * away_def * league_average_home_goals_scored
    away_team_goals = away_at * home_def * league_average_away_goals_scored
    
    scores_tuple = [[h, a] for h in range(0, 11) for a in range(0, 11)]
    for result in scores_tuple:
        result.append(
            poisson_goals(result[0], home_team_goals) * poisson_goals(result[1], away_team_goals)
        )

    short_home = home_team.split()[0]
    short_away = away_team.split()[0]
        
    home_win = 0
    for prediction in scores_tuple:
        if prediction[0] > prediction[1]:
            home_win += prediction[2]
        
    away_win = 0
    for prediction in scores_tuple:
        if prediction[0] < prediction[1]:
            away_win += prediction[2]
            
    draw = 0
    for prediction in scores_tuple:
        if prediction[0] == prediction[1]:
            draw += prediction[2]
    
    scores_tuple.sort(key=lambda x: x[2], reverse=True)
    most_likely = scores_tuple[0]
    
    table_wld = [
        ["Result", "Probability"],
        ["{} Win".format(short_home), home_win],
        ["{} Win".format(short_away), away_win],
        ["Draw", draw],
        ["Most Probable Score: {} - {}".format(
            most_likely[0], most_likely[1]
            ), most_likely[2]
        ]
    ]

    table_wld = ff.create_table(table_wld)
    iplot(table_wld)

In [19]:
def match_plot(teamA, teamB):
    home_at, away_at, home_def, away_def = teams_data_tuple(teamA, teamB)
    goals = np.arange(0, 11)
    home_team_goals = home_at * away_def * league_average_home_goals_scored
    away_team_goals = away_at * home_def * league_average_away_goals_scored
    h_plot = Scatter(
        x = goals,
        y = np.array([poisson_goals(i, home_team_goals) for i in goals]),
        mode = 'lines',
        name = teamA,
    )
    a_plot = Scatter(
        x = goals,
        y = np.array([poisson_goals(i, away_team_goals) for i in goals]),
        mode = 'lines',
        name = teamB,
    )
    data = [h_plot, a_plot]
    iplot(data)

In [20]:
def implied_probability(numerator, denominator):
    return denominator / (numerator + denominator)

In [21]:
df.tail(20)

Unnamed: 0,Date,Home Team,Home Score,Away Score,Away Team
350,2018-04-28,Swansea City FC,0,1,Chelsea FC
351,2018-04-29,West Ham United FC,1,4,Manchester City FC
352,2018-04-29,Manchester United FC,2,1,Arsenal FC
353,2018-04-30,Tottenham Hotspur FC,2,0,Watford FC
354,2018-05-04,Brighton & Hove Albion,1,0,Manchester United FC
355,2018-05-05,Stoke City FC,1,2,Crystal Palace FC
356,2018-05-05,AFC Bournemouth,1,0,Swansea City FC
357,2018-05-05,Watford FC,2,1,Newcastle United FC
358,2018-05-05,West Bromwich Albion FC,1,0,Tottenham Hotspur FC
359,2018-05-05,Leicester City FC,0,2,West Ham United FC


In [22]:
table_strengths(team_strengths)

In [42]:
def matchday(day=None, month=None, year=None):
    if all(d is None for d in [day, month, year]):
        date_string = datetime.datetime.today().strftime("%Y-%m-%d")
    else:
        date_string = '{}-{}-{}'.format(year, month, day)
    for fixture in rff['fixtures']:
        if date_reader(fixture['date']) == date_string:
            match_prediction(fixture['homeTeamName'], fixture['awayTeamName'])
            match_plot(fixture['homeTeamName'], fixture['awayTeamName'])

In [41]:
matchday(day='13', month='05', year='2018')

HHHHHH


HHHHHH


HHHHHH


HHHHHH


HHHHHH


HHHHHH


HHHHHH


HHHHHH


HHHHHH


HHHHHH
