## Import data

In [59]:
import pandas as pd
import re
pd.set_option('display.max_columns', None)

In [60]:
matches_cleaned = pd.read_csv('data/processed_data/soccer_match/soccer_match-matches_processed.csv')
X_processed =  pd.read_csv('data/processed_data/soccer_match/soccer_match-X_processed.csv')
y_processed = pd.read_csv('data/processed_data/soccer_match/soccer_match-y_processed.csv')

In [166]:
def get_competitions(matches: pd.DataFrame) -> list:
    l_comp = matches.competitionId.unique()
    comp = {524 : 'Serie A',
                364 : 'Premier League',
                795 : 'LaLiga',
                412 : 'Ligue 1',
                426 : 'Bundesliga',
                102 : 'Euro Cup',
                28 : 'World Cup'
               }
    competitions = []
    for id in l_comp:
        compet = {}
        compet['competition_id'] = id
        compet['name'] = comp[id]
        competitions.append(compet)
    return competitions

In [167]:
get_competitions(matches_cleaned)

[{'competition_id': 28, 'name': 'World Cup'},
 {'competition_id': 795, 'name': 'LaLiga'},
 {'competition_id': 524, 'name': 'Serie A'},
 {'competition_id': 412, 'name': 'Ligue 1'},
 {'competition_id': 364, 'name': 'Premier League'},
 {'competition_id': 426, 'name': 'Bundesliga'},
 {'competition_id': 102, 'name': 'Euro Cup'}]

In [163]:
def get_seasons(competition_id:int) ->list:
    seasons = matches_cleaned[matches_cleaned['competitionId'] == competition_id].seasonId.unique()
    s = []
    for season in seasons:
        seas_dic = {}
        min_year = matches_cleaned[matches_cleaned['seasonId']==season].dateutc.min()[0:4]
        max_year = matches_cleaned[matches_cleaned['seasonId']==season].dateutc.max()[0:4]
        matchweeks = sorted(matches_cleaned[matches_cleaned['seasonId']==season].gameweek.unique())
        dataset = 'soccermatch'
        seas_dic['season_id'] = season
        if min_year != max_year:
            seas_dic['name'] = f"{min_year}/{max_year}" 
        else:
            seas_dic['name'] = f"{min_year}"
        seas_dic['matchweeks'] = matchweeks
        seas_dic['dataset'] = dataset
        s.append(seas_dic)
    return s

In [169]:
get_seasons(364)

[{'season_id': 181150,
  'name': '2017/2018',
  'matchweeks': [3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38],
  'dataset': 'soccermatch'}]

In [149]:
def get_matches(competition_id:int,
                season_id:int,
                matchweek:int,
                dataset:pd.DataFrame) -> list:
    condition_comp = dataset['competitionId'] == competition_id
    condition_season = dataset['seasonId'] == season_id
    condition_matchweek = dataset['gameweek'] == matchweek
    condition = condition_comp & condition_season & condition_matchweek

    matches_filt = dataset[condition]
    match_l = []
    for index, row in matches_filt.iterrows():
        m = {}
        m['match_id'] = matches_filt.loc[index, 'matchId']
        m['name'] = f"{matches_filt.loc[index, 'homeTeam']} vs {matches_filt.loc[index, 'awayTeam']}"
        m['home_team'] = matches_filt.loc[index, 'homeTeam']
        m['away_team'] = matches_filt.loc[index, 'awayTeam']
        match_l.append(m)
    return match_l

In [170]:
get_matches(28,
            10078,
            0,
            matches_cleaned)

[{'match_id': 2058017,
  'name': 'France vs Croatia',
  'home_team': 'France',
  'away_team': 'Croatia'},
 {'match_id': 2058016,
  'name': 'Belgium vs England',
  'home_team': 'Belgium',
  'away_team': 'England'},
 {'match_id': 2058015,
  'name': 'Croatia vs England',
  'home_team': 'Croatia',
  'away_team': 'England'},
 {'match_id': 2058014,
  'name': 'France vs Belgium',
  'home_team': 'France',
  'away_team': 'Belgium'},
 {'match_id': 2058012,
  'name': 'Russia vs Croatia',
  'home_team': 'Russia',
  'away_team': 'Croatia'},
 {'match_id': 2058013,
  'name': 'Sweden vs England',
  'home_team': 'Sweden',
  'away_team': 'England'},
 {'match_id': 2058011,
  'name': 'Brazil vs Belgium',
  'home_team': 'Brazil',
  'away_team': 'Belgium'},
 {'match_id': 2058010,
  'name': 'Uruguay vs France',
  'home_team': 'Uruguay',
  'away_team': 'France'},
 {'match_id': 2058009,
  'name': 'Colombia vs England',
  'home_team': 'Colombia',
  'away_team': 'England'},
 {'match_id': 2058008,
  'name': 'Swed

In [157]:
matches_cleaned[matches_cleaned['matchId'] == 2058017][['homeTeam', 'awayTeam', 'homeScore', 'awayScore']]

Unnamed: 0,homeTeam,awayTeam,homeScore,awayScore
0,France,Croatia,4,2


In [171]:
def get_results(match_id: int,
                dataset:pd.DataFrame) -> dict:
    game = dataset[dataset['matchId'] == match_id]
    result = {}
    res = f"{game.homeTeam[0]} {game.homeScore[0]} - {game.awayTeam[0]} {game.awayScore[0]}"
    result['result'] = res
    return result

In [172]:
get_results(2058017, matches_cleaned)

{'result': 'France 4 - Croatia 2'}