### Euro 2020 Prediction Game

For the euro 2020 prediction game my idea is to do all the score keeping in python. Read the excel data into a python class. The class would contain some nicely organised data structure to store the predictions. 


Idea:

- class Bracket:
    - load from excel; phase I and phase II are two seperate sheets
    - group_stage dict {mid:score}
    - knockout_phase:
        - phase I:
            - l16: list of tuples ('Team', 'rank')
            - sf: list
            - f: list
            - bonus: list
        - phase II:
            - qf: list
            - sf: list
            - f: list


In [191]:
from collections import namedtuple
import pandas as pd
import os

class Score():
    def __init__(self, mid, score, teams=None):
        self.mid = mid
        self.home = None
        self.away = None
        self.score = None
        self.teams = None
        
        if teams:
            self.teams = tuple(teams)
        if isinstance(score, str):
            if '?' in score:
                # handling livescore future game score
                print(f'No score yet for {self.teams}')
                return
            else:
                score = score.split('-')
                
        if isinstance(score, (list, tuple)) and len(score)==2:
            self.score = tuple(score)
            self.home = int(score[0])
            self.away = int(score[1])
        else:
            raise TypeError('unknown score format')
            
        # 1 - home_win; 0 - draw; 2 - away_win
        self.outcome = (self.home != self.away) + (self.away>self.home)
        return 
    
    def __str__(self):
        if self.score:
            if self.teams:
                return f"{self.teams[0]} {'-'.join(self.score)} {self.teams[1]}"
            else:
                return f"{self.mid}: {'-'.join(self.score)}"
        else:
            if self.teams:
                return f'{self.teams[0]} ? - ? {self.teams[1]}'
            else:
                return f'{self.mid}: ? - ?'


def score_compare(a, b, outcome=10, result=15):
    '''
    compare scores a & b for group stage
    '''
    if hasattr(a, 'teams') and hasattr(b, 'teams'):
        if a.teams != b.teams:
            return 0
    if a.score == b.score:
        return result
    elif a.outcome == b.outcome:
        return outcome
    else:
        return 0

def team_compare(a, b, qualified, ordering=0):
    '''
    compare teams in a to teams in b and score points accordingly
    
    a and b must be sets of teams
    '''
    pts = 0
    correct_qualified = len(a.intersection(b))
    
    if ordering and a and isinstance(a, tuple):
        correct_ordering = correct_qualified
        a_teams = set([t[0] for t in a])
        b_teams = set([t[0] for t in b])
        correct_qualified = len(a_teams.intersection(b_teams))
        pts += correct_ordering * ordering
        
    pts += correct_qualified * qualified
    
    return pts
        
        
class Stage():
    def __init__(self, name, matches=None, teams=None, outcome=None, result=None, qualified=None, ordering=None):
        '''
        matches - a dict of matches and the corresponding scores
        teams - a list of teams who qualified for this stage
        '''
        self.name = name
        if matches:
            self.matches = matches
            self.outcome = outcome or 0
            self.result = result or 0
        if teams:
            self.teams = set(teams)
            self.qualified = qualified or 0
            self.ordering = ordering or 0
            
    def compute(self, other):
        points = 0
        if self.matches:
            missing_matches = set(self.matches.keys()) - set(other.matches.keys())
            if missing_matches:
                print(f'Warning missing matches! {missing_matches}')
            for mid, match in self.matches.items():
                points += score_compare(match, other[mid])
        
        if self.teams:
            points += team_compare(self.teams, other.teams)
            
        return points
                
        
            
class Bracket():
    
    def __init__(self, name, workdir=None):
        '''
        load bracket from excel or pkl
        
        maybe specify name and dir or something along those lines
        '''
        self.dat = {}
        pkl_file_1 = os.path.join(workdir,'phase_I', name + '.pkl')
        xlsx_file_1 = os.path.join(workdir,'phase_I','CxFPoolsEuro2020_PhaseI_'+ name + '.pkl')
        if os.path.exists(pkl_file_1):
            self.dat['phase_1'] = pkl_load(pkl_file_1)
        elif os.path.exists(xlsx_file_1):
            phase_1 = {}
            dat = pd.read_excel(xlsx_file_1, sheet_name='INTERNAL_USE_ONLY').iloc[:,0].values
            phase_1['group_stage'] = Stage(name='Group Stage',matches={i+1:m for i,m in enumerate(dat[1:37])})
            phase_1['last_16'] = Stage(name='Last 16', teams=list(zip(dat[38:54], [1,2]*4 + [3]*4)))
            phase_1['semi_final']= Stage(name='Semi Final', teams=list(dat[55:59]))
            phase_1['final'] = Stage(name='Final', teams=list(dat[60:62]))
            phase_1['winner'] = Stage(name='Final', teams=dat[63:64])
            self.dat['phase_1'] = phase_1
        else:
            print(f'No valid Phase 1 file found for {name}')
        
        pkl_file_2 = os.path.join(workdir,'PhaseII_'+ name + '.pkl')
        xlsx_file_2 = os.path.join(workdir,'CxFPoolsEuro2020_PhaseII_'+ name + '.pkl')
        if os.path.exists(pkl_file_2):
            self.dat['phase_2'] = pkl_load(pkl_file_2)
        elif os.path.exists(xlsx_file_1):
            phase_2 = {}
            dat = pd.read_excel(xlsx_file_2, sheet_name='INTERNAL_USE_ONLY').iloc[:,0].values
            # TODO add phase 2 once sheet is complete
        else:
            print(f'No valid Phase 2 file found for {name}')
        return self
            
class Tournament():
    def __init__(self, workdir):
        with open(os.path.join(workdir,'metadata.yml')) as f:
            config = yaml.safe_load(f)
        self.participants = config['participants']
        self.scoring = config['scoring']
        self.workdir = workdir
        self.brackets = {}
        for participant in self.participants:
            b = Bracket(participant, self.workdir)
        
        
        

In [16]:
dat = pd.read_excel('CxFPoolsEuro2020_PhaseI_LukeAarohi.xlsx',sheet_name='INTERNAL_USE_ONLY').iloc[:,0].values

livescores.com structuring:


example url: https://www.livescores.com/soccer/euro-2020/group-a/results/all/
url structure:
    tournament base: https://www.livescores.com/soccer/euro-2020


In [338]:
from bs4 import BeautifulSoup
import requests, os, re
from urllib import parse
import random

gen_score = lambda : f'{random.randint(0,3)} - {random.randint(0,3)}'

def from_livescore(x):
    x = x.replace('-',' ').title()
    if x.startswith('Group'):
        return 'Group Stage'
    return x

euro_url = 'https://www.livescores.com/soccer/euro-2020/'

ls_id_map = {80596: 1,
             80595: 2,
             80737: 3,
             80736: 4,
             80742: 5,
             81035: 6,
             81036: 7,
             80743: 8,
             80748: 9,
             80749: 10,
             80612: 11,
             80611: 12,
             80738: 13,
             80598: 14,
             80597: 15,
             81037: 16,
             80739: 17,
             81038: 18,
             80750: 19,
             80745: 20,
             80744: 21,
             80613: 22,
             80614: 23,
             80751: 24,
             80599: 25,
             80600: 26,
             81039: 27,
             81040: 28,
             80741: 29,
             80740: 30,
             80747: 31,
             80746: 32,
             80753: 33,
             80752: 34,
             80615: 35,
             80616: 36}


def fetch_beautiful_markup(url):
    print('fetching markup from ' + url)
    
    # try catching all possible http errors
    try :
        livescore_html = requests.get(url)
    except Exception as e :
        return print('An error occured as: ', e)

    parsed_markup = BeautifulSoup(livescore_html.text, 'html.parser')
    
    return parsed_markup

def extract_scores(parsed_markup, stage=None):
    # dictionary to contain score
    scores = {}

    # scrape needed data from the parsed markup
    for element in parsed_markup.find_all("div", "row-gray") :
        
        match_name_element = element.find(attrs={"class": "scorelink"})
        ls_id = int(element.get('data-eid'))
        mid = ls_id_map.get(ls_id, ls_id)

        if match_name_element is not None :
            # this means the match is about to be played
            match_stage, matchup = match_name_element.get('href').split('/')[3:5]
            match_stage = from_livescore(match_stage)
            if match_stage not in scores: scores[match_stage] = {}
            home_team = from_livescore(matchup.split('-vs-')[0].strip())
            away_team = from_livescore(matchup.split('-vs-')[1].strip())
            teams = (home_team, away_team)
            score = element.find("div", "sco").get_text().strip()
            score = gen_score()

            # add our data to our dictionary
            scores[match_stage][mid] = Score(mid, score, teams)
        elif stage:
            if stage not in scores: scores[stage] = {}
            # we need to use a different method to get our data
            home_team = '-'.join(element.find("div", "tright").get_text().strip().split(" "))
            away_team = '-'.join(element.find(attrs={"class": "ply name"}).get_text().strip().split(" "))

            score = element.find("div", "sco").get_text().strip()
            score = gen_score()

            teams = (home_team, away_team)

            # add our data to our dictionary
            scores[stage][mid] = Score(mid, score, teams)

    return scores

def extract_competition_stages(markup, comp):
    stages = {}
    selected_cat = markup.find('aside', 'left-bar').find('ul','buttons btn-light').find('a',{'class':'selected cat'})
    stage_refs = selected_cat.parent.find_all('a', attrs={'href':re.compile(comp+'.*/')})
    for g in stage_refs:
        g_url = g.get('href')
        g_name = g.get('title')
        stages[g_name] = g_url
    
    return stages
    

def scrape_scores_from_livescore(url, stage) :
    
    parsed_markup = fetch_beautiful_markup(url)
    scores = extract_scores(parsed_markup, stage)
    return scores


def scrape_competition_from_livescore(comp_url):
    res = {}
    comp = parse.urlparse(comp_url).path
    comp_markup = fetch_beautiful_markup(comp_url)
    comp_scores = extract_scores(comp_markup)
    comp_stages = extract_competition_stages(comp_markup, comp)
    
    for g_name, g_url in comp_stages.items():
        g_path = parse.urljoin(comp_url, g_url)
        for what in ['results/all/', 'fixtures/all/']:
            what_path = parse.urljoin(g_path, what)
            g_what = scrape_scores_from_livescore(what_path, g_name)
            for stage, stage_scores in g_what.items():
                if stage not in comp_scores:
                    comp_scores[stage] = stage_scores
                else:
                    for mid, score in stage_scores.items():
                        if mid not in comp_scores:
                            comp_scores[stage][mid] = score
    
    for stage, stage_scores in comp_scores.items():
        res[stage] = Stage(stage, stage_scores)
        
    return res
    
    
    

In [339]:
test_url = 'https://www.livescores.com/soccer/euro-2020/'
#test_url = 'https://www.livescores.com/soccer/italy/serie-a/results/all/'
#test_url = 'https://www.livescores.com/soccer/italy/'
scoreboard = scrape_competition_from_livescore(test_url)

fetching markup from https://www.livescores.com/soccer/euro-2020/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-a/results/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-a/fixtures/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-b/results/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-b/fixtures/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-c/results/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-c/fixtures/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-d/results/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-d/fixtures/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-e/results/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/group-e/fixtures/all/
fetching markup from https://www.livescores.com/soccer/euro-2020/grou

In [340]:
scoreboard

{'Group Stage': <__main__.Stage at 0x12e3ad190>,
 'Round of 16': <__main__.Stage at 0x12d830280>,
 'Quarter-finals': <__main__.Stage at 0x12d8302b0>,
 'Semi-finals': <__main__.Stage at 0x12d830fa0>,
 'Final': <__main__.Stage at 0x12d830f70>}

In [344]:
scoreboard['Group Stage'].matches[1].__str__()

'Turkey 1 - 0 Italy'

In [351]:
import yaml