In [1]:
import pandas as pd
import requests
import threading
from datetime import datetime, timedelta
import numpy as np
import ast
from flask import config


class DiskConnection:
    """
    manages the disk read and writes and inital setup from disk
    """

    def __init__(self, lock, config):
        self.lock = lock
        self.config = config
        self.leagues_db = pd.DataFrame(columns=['country_id', 'country_name', 'league_id', 'league_name'])
        self.events_db = pd.DataFrame()
        self.league_db_standings = {}
        self.setup_all_league_standings_from_disk()

    def get_dbs(self):
        return(self.leagues_db, self.events_db, self.league_db_standings)

    def write_db_to_disk(self, path, db):
        """
        write pandas df to a csv
        """
        logging.info('writing db to fisk at {}'.format(path))
        try:
            with self.lock:
                db.to_csv(path)
            return True
        except Exception as e:
            logging.error('error in write_db_to_disk {}'.format(e))
        return False

    def read_db_from_disk(self, path, index=True):
        """
        return df from disk via path
        """
        logging.info('getting db at {} from disk'.format(path))
        try:
            with self.lock:
                df = pd.read_csv(path, index=index)
            return df
        except Exception as e:
            logging.error('error in getting db at {} error is {}'.format(path,e))
            return pd.DataFrame()

    def setup_all_league_standings_from_disk(self):
        if self.setup_leagues_db_from_disk() and self.setup_events_db_from_disk():
            for league_id in self.leagues_db.league_id:
                self.setup_league_standings_db_from_disk(league_id)
            return True
        return False

    def setup_leagues_db_from_disk(self):
        df = self.read_db_from_disk(self.config.path_leagues_db, False)
        if df.empty:
            return False
        self.leagues_db = df
        return True

    def setup_events_db_from_disk(self):
        df = self.read_db_from_disk(self.config.path_events_db)
        if df.empty:
            return False
        df = df.astype({'match_id': 'int32', 'country_id': 'int32', 'league_id':'int32'}).set_index('match_id', drop=False)
        df['match_id'] = df['match_id'].astype('int64')
        df['match_date'] = [datetime.strptime(d, '%Y-%m-%d') for d in df['match_date']]
        self.events_db = df
        return True

    def setup_league_standings_db_from_disk(self, league_id):
        path = self.config.path_league_standings_stem + str(league_id) + ".csv"
        df = self.read_db_from_disk(path)
        if df.empty:
            return False
        self.league_db_standings[league_id] = df
        return True




class HostConnection:
    """
    manages the fetches from host
    """

    def __init__(self, config, disk_connection):
        self.config = config
        self.last_refresh_times = {}
        self.disk_connection = disk_connection


    def refresh_all_leagues_from_fetch(self, leagues_db, league_db_standings):
        for league_id in leagues_db.league_id:
            league_db_standings[league_id] = self.update_league_db_standings_from_fetch(league_id, league_db_standings)
        return league_db_standings


    def update_last_refresh_times(self, id):
        self.last_refresh_times[id] = datetime.now()


    def is_data_fresh(self, id):
        if not id in self.last_refresh_times:
            return False
        time_since_update_mins = (datetime.now() - self.last_refresh_times[id]).seconds / 60
        if time_since_update_mins > self.config.refresh_time_mins:
            return False
        return True


    def update_leagues_db_from_fetch(self, leagues_db):
        # check if data is already fresh
        if self.is_data_fresh(0):
            logging.info('in update_leagues_db_from_fetch and data is fresh')
            return leagues_db
        logging.info('in update_leagues_db_from_fetch data is stale')

        # fetch data from webservice
        path = self.config.apifootball_host + '/?action=get_leagues&APIkey=' + self.config.apifootball_key
        try:
            response = requests.get(path)
            df = pd.DataFrame(response.json())
            leagues_db = df.combine_first(leagues_db)
            self.disk_connection.write_db_to_disk(self.config.path_leagues_db, leagues_db)
            self.update_last_refresh_times(0)
        except Exception as e:
            logging.error('error in update_leagues_db_from_fetch {}'.format(e))
        return leagues_db


    def update_events_from_fetch(self, date_from, date_to, events_db):
        # check if data is already fresh
        if self.is_data_fresh(1):
            logging.info('in update_events_from_fetch and data is fresh')
            return True
        logging.info('in update_events_from_fetch data is stale')

        # fetch data from webservice
        date_from = self._date_to_string(date_from)
        date_to = self._date_to_string(date_to)
        path = self.config.apifootball_host + '/?action=get_events&from=' + date_from + '&to=' + date_to + '&APIkey=' + self.config.apifootball_key
        try:
            logging.info('fetch for {}'.format(path))
            response = requests.get(path)
            df = pd.DataFrame(response.json()).astype({'match_id': 'int32', 'country_id': 'int32', 'league_id':'int32'}).set_index('match_id', drop=False)
            df['match_date'] = [datetime.strptime(d, '%Y-%m-%d') for d in df['match_date']]
            events_db = df.combine_first(events_db)

            self.disk_connection.write_db_to_disk(self.config.path_events_db, events_db)
            self.update_last_refresh_times(1)
        except Exception as e:
            logging.error('error in update_events_from_fetch {}'.format(e))
        return events_db


    def update_league_db_standings_from_fetch(self, league_id, league_db_standings):
        # check if data is already fresh
        if self.is_data_fresh(league_id):
            logging.info('in update_league_db_standings_from_fetch and data is fresh')
            return league_db_standings[league_id]
        logging.info('in update_league_db_standings_from_fetch data is stale')

        # fetch data from webservice
        path = self.config.apifootball_host + '/?action=get_standings&league_id=' + str(
            league_id) + '&APIkey=' + self.config.apifootball_key
        try:
            response = requests.get(path)
            df = pd.DataFrame(response.json())
            logging.info('changed league_db_standings from the fetch for {}'.format(league_id))
            self.disk_connection.write_db_to_disk(self.config.path_league_standings_stem + str(league_id), df)
            self.update_last_refresh_times(league_id)
        except Exception as e:
            logging.error('error in update_league_db_standings_from_fetch {}'.format(e))
        return df

    def _date_to_string(self, date):
        yyyy = str(date.year)
        mm = str(date.month)
        dd = str(date.day)
        if len(mm) == 1:
            mm = '0' + mm
        if len(dd) == 1:
            dd = '0' + dd
        return yyyy + '-' + mm + '-' + dd



class DatabaseConnection:
    """
    uses DiskConnection to setup dbs and write update
    uses HostConnection to refresh the dbs
    """

    def __init__(self, config):
        self.config = config
        self.lock = threading.Lock()
        self.disk_connection = DiskConnection(self.lock, self.config)
        self.leagues_db, self.events_db, self.league_db_standings = self.disk_connection.get_dbs()
        self.host_connection = HostConnection(self.config, self.disk_connection)

        # inital updates
        date_from = datetime.now() - timedelta(days=14)
        date_to = datetime.now() + timedelta(days=14)
        self.events_db = self.host_connection.update_events_from_fetch(date_from, date_to, self.events_db)
        self.leagues_db = self.host_connection.update_leagues_db_from_fetch(self.leagues_db)
        self.league_db_standings = self.host_connection.refresh_all_leagues_from_fetch(self.leagues_db, self.league_db_standings)


    def get_events(self, league_id=None, date_from=None, date_to=None, match_id=None):
        df = self.events_db.copy()
        trues = np.array([True]*len(df))

        # apply filters
        match_id_filter = df.index == match_id if match_id else trues
        league_id_filter = df['league_id'] == league_id if league_id else trues
        date_from_filter = df['match_date'] > date_from.replace(hour=0, minute=0, second=0) if date_from else trues
        date_to_filter = df['match_date'] < date_to.replace(hour=23, minute=59, second=0) if date_to else trues

        # format the dataframe correctly
        df = df.replace(np.nan, '', regex=True)
        df = df.loc[match_id_filter & league_id_filter & date_from_filter & date_to_filter]
        df['match_date'] = [d.timestamp() for d in df['match_date']]
        #df['lineup'] = [ast.literal_eval(d) for d in df['lineup']]
        #df['cards'] = [ast.literal_eval(d) for d in df['cards']]
        #df['goalscorer'] = [ast.literal_eval(d) for d in df['goalscorer']]
        #df['statistics'] = [ast.literal_eval(d) for d in df['statistics']]

        return df.to_dict('index')


    def df_to_strings(self, df):
        for col in df.columns:
            df[col] = df[col].astype(str)
        return df


    def get_league_db(self):
        self.leagues_db = self.host_connection.update_leagues_db_from_fetch(self.leagues_db)
        return self.leagues_db


    def get_league_db_standings(self, league_id):
        self.league_db_standings[league_id] = self.host_connection.update_league_db_standings_from_fetch(league_id, self.league_db_standings)
        return self.league_db_standings.get(league_id, pd.DataFrame())


    def get_leagues(self):
        """
        returns the league and country info in a dictionary form
        """
        logging.info('inside get_leagues')
        df = self.df_to_strings(self.get_league_db())
        leagues_dict = {}
        cids = list(set(df['country_id']))
        df = df.set_index('country_id')
        for cid in cids:
            if sum(df.index == cid) > 1:
                league_names = list(df.loc[cid]['league_name'])
                league_ids = list(df.loc[cid]['league_id'])
                country_name = list(df.loc[cid]['country_name'])[0]
            else:
                league_names = [df.loc[cid]['league_name']]
                league_ids = [df.loc[cid]['league_id']]
                country_name = df.loc[cid]['country_name']
            leagues_dict[cid] = {
                'country_name': country_name,
                'leagues': {
                    'league_ids': league_ids,
                    'country_name': country_name,
                    'league_names': league_names
                }
            }
        return leagues_dict


#@app.route('/')
def check_server():
    return "connected to middle layer server"


#@app.route('/get_leagues_db/', methods=['GET'])
def get_leagues_db():
    logging.info("get_leagues_db")
    df = db_con.get_league_db()
    return (df.to_dict('index'))


#@app.route('/get_sidebar_info/', methods=['GET'])
def get_sidebar_info():
    return (db_con.get_leagues())


#@app.route('/get_league_standings_db/<league_id>', methods=['GET'])
def get_league_standings_db(league_id):
    league_id = int(league_id)
    df = db_con.get_league_db_standings(league_id)
    if df.empty:
        return ('league_id not recognised')
    return (df.to_dict('index'))


#@app.route('/get_fixtures/<league_id>', methods=['GET'])
def get_fixtures(league_id):
    league_id = int(league_id)
    date_from = datetime.now()
    date_to = datetime.now() + timedelta(days=14)
    return (db_con.get_events(league_id=league_id, date_from=date_from, date_to=date_to))


#@app.route('/get_results/<league_id>', methods=['GET'])
def get_results(league_id):
    league_id = int(league_id)
    date_from = datetime.now() - timedelta(days=14)
    date_to = datetime.now()
    return (db_con.get_events(league_id=league_id, date_from=date_from, date_to=date_to))


#@app.route('/get_match/<match_id>', methods=['GET'])
def get_match(match_id):
    match_id = int(match_id)
    match_info = db_con.get_events(match_id=match_id)[match_id]
    return (match_info)

In [2]:
import logging
logging.basicConfig(level=logging.DEBUG)

global db_con
db_con = DatabaseConnection(config)  # or whatever you need to do

INFO:root:getting db at /Users/ollieloney/SoccerScores/databases/leagues_db.csv from disk
ERROR:root:error in getting db at /Users/ollieloney/SoccerScores/databases/leagues_db.csv error is parser_f() got an unexpected keyword argument 'index'
INFO:root:in update_events_from_fetch data is stale
INFO:root:fetch for https://apifootball.com/api/?action=get_events&from=2019-10-15&to=2019-11-12&APIkey=9fe5c8415f9eda3d0abd11d51578518d6a7c48d172f82abed46bf5bd9fe2479f
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): apifootball.com:443
DEBUG:urllib3.connectionpool:https://apifootball.com:443 "GET /api/?action=get_events&from=2019-10-15&to=2019-11-12&APIkey=9fe5c8415f9eda3d0abd11d51578518d6a7c48d172f82abed46bf5bd9fe2479f HTTP/1.1" 200 None
INFO:root:writing db to fisk at /Users/ollieloney/SoccerScores/databases/events_db.csv
INFO:root:in update_leagues_db_from_fetch data is stale
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): apifootball.com:443
DEBUG:urllib3.conn

In [3]:
get_sidebar_info()

INFO:root:inside get_leagues
INFO:root:in update_leagues_db_from_fetch and data is fresh


{'173': {'country_name': 'France',
  'leagues': {'league_ids': ['128'],
   'country_name': 'France',
   'league_names': ['Ligue 2']}},
 '169': {'country_name': 'England',
  'leagues': {'league_ids': ['63'],
   'country_name': 'England',
   'league_names': ['Championship']}}}

In [4]:
get_leagues_db()

INFO:root:get_leagues_db
INFO:root:in update_leagues_db_from_fetch and data is fresh


{0: {'country_id': '169',
  'country_name': 'England',
  'league_id': '63',
  'league_name': 'Championship'},
 1: {'country_id': '173',
  'country_name': 'France',
  'league_id': '128',
  'league_name': 'Ligue 2'}}

In [5]:
get_results('63')

{455326: {'cards': [{'time': "38'",
    'home_fault': '',
    'card': 'yellowcard',
    'away_fault': 'S.  Sam Hutchinson'},
   {'time': "64'",
    'home_fault': '',
    'card': 'yellowcard',
    'away_fault': 'L.  Liam Palmer'},
   {'time': "90'",
    'home_fault': '',
    'card': 'yellowcard',
    'away_fault': 'M.  Morgan Fox'}],
  'country_id': 169,
  'country_name': 'England',
  'goalscorer': [{'time': "19'",
    'home_scorer': '',
    'score': '0 - 1',
    'away_scorer': 'J.  Julian Boerner'},
   {'time': "87'",
    'home_scorer': 'L.  Lee Tomlin',
    'score': '1 - 1',
    'away_scorer': ''}],
  'league_id': 63,
  'league_name': 'Championship',
  'lineup': {'home': {'starting_lineups': [{'lineup_player': 'Neil Etheridge',
      'lineup_number': '1',
      'lineup_position': '1'},
     {'lineup_player': 'Lee Peltier',
      'lineup_number': '2',
      'lineup_position': '2'},
     {'lineup_player': 'Sean Morrison',
      'lineup_number': '4',
      'lineup_position': '3'},
     {

In [6]:
db_con.get_events(match_id='455326')

{}

In [7]:
get_match(455326)

{'cards': [{'time': "38'",
   'home_fault': '',
   'card': 'yellowcard',
   'away_fault': 'S.  Sam Hutchinson'},
  {'time': "64'",
   'home_fault': '',
   'card': 'yellowcard',
   'away_fault': 'L.  Liam Palmer'},
  {'time': "90'",
   'home_fault': '',
   'card': 'yellowcard',
   'away_fault': 'M.  Morgan Fox'}],
 'country_id': 169,
 'country_name': 'England',
 'goalscorer': [{'time': "19'",
   'home_scorer': '',
   'score': '0 - 1',
   'away_scorer': 'J.  Julian Boerner'},
  {'time': "87'",
   'home_scorer': 'L.  Lee Tomlin',
   'score': '1 - 1',
   'away_scorer': ''}],
 'league_id': 63,
 'league_name': 'Championship',
 'lineup': {'home': {'starting_lineups': [{'lineup_player': 'Neil Etheridge',
     'lineup_number': '1',
     'lineup_position': '1'},
    {'lineup_player': 'Lee Peltier',
     'lineup_number': '2',
     'lineup_position': '2'},
    {'lineup_player': 'Sean Morrison',
     'lineup_number': '4',
     'lineup_position': '3'},
    {'lineup_player': 'Aden Flint',
     'lineu

In [12]:
db_con.events_db

Unnamed: 0_level_0,cards,country_id,country_name,goalscorer,league_id,league_name,lineup,match_awayteam_extra_score,match_awayteam_halftime_score,match_awayteam_name,...,match_hometeam_halftime_score,match_hometeam_name,match_hometeam_penalty_score,match_hometeam_score,match_hometeam_system,match_id,match_live,match_status,match_time,statistics
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
455326,"[{'time': '38'', 'home_fault': '', 'card': 'ye...",169,England,"[{'time': '19'', 'home_scorer': '', 'score': '...",63,Championship,{'home': {'starting_lineups': [{'lineup_player...,,1,Sheffield Wednesday,...,0,Cardiff City,,1,4-3-3,455326,0,FT,18:45,"[{'type': 'shots on target', 'home': '4', 'awa..."
455342,"[{'time': '33'', 'home_fault': '', 'card': 'ye...",173,France,[],128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,0,Nancy,...,0,AC Ajaccio,,0,4-4-2,455342,0,FT,18:00,"[{'type': 'shots on target', 'home': '1', 'awa..."
455343,"[{'time': '12'', 'home_fault': '', 'card': 'ye...",173,France,[],128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,0,Guingamp,...,0,Niort,,0,5-4-1,455343,0,FT,18:00,"[{'type': 'shots on target', 'home': '1', 'awa..."
455344,"[{'time': '22'', 'home_fault': 'L. Lorenzo Ra...",173,France,"[{'time': '76'', 'home_scorer': '', 'score': '...",128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,0,Rodez,...,0,Clermont Foot,,0,4-2-3-1,455344,0,FT,18:00,"[{'type': 'shots on target', 'home': '7', 'awa..."
455345,"[{'time': '32'', 'home_fault': 'Kiki', 'card':...",173,France,"[{'time': '7'', 'home_scorer': 'I. Ihsan Sack...",128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,1,Le Mans,...,1,Troyes,,2,4-2-3-1,455345,0,FT,18:00,"[{'type': 'shots on target', 'home': '6', 'awa..."
455346,"[{'time': '56'', 'home_fault': 'B. Benjamin S...",173,France,[],128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,0,Sochaux,...,0,Chambly,,0,4-3-3,455346,0,FT,18:00,"[{'type': 'shots on target', 'home': '3', 'awa..."
455347,"[{'time': '64'', 'home_fault': 'P. Pierre Gib...",173,France,[],128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,0,Orleans,...,0,Grenoble,,0,4-2-3-1,455347,0,FT,18:00,"[{'type': 'shots on target', 'home': '2', 'awa..."
455348,"[{'time': '20'', 'home_fault': '', 'card': 'ye...",173,France,"[{'time': '13'', 'home_scorer': '', 'score': '...",128,Ligue 2,{'home': {'starting_lineups': [{'lineup_player...,,1,Paris FC,...,0,Chateauroux,,0,4-3-3,455348,0,FT,18:00,"[{'type': 'shots on target', 'home': '4', 'awa..."
455349,"[{'time': '35'', 'home_fault': '', 'card': 'ye...",173,France,[],128,Ligue 2,"{'home': {'starting_lineups': [], 'substitutes...",,0,Valenciennes,...,0,Caen,,0,4-2-3-1,455349,0,FT,18:00,"[{'type': 'shots on target', 'home': '1', 'awa..."
456661,"[{'time': '45'', 'home_fault': 'L. Lewis Trav...",169,England,"[{'time': '13'', 'home_scorer': '', 'score': '...",63,Championship,{'home': {'starting_lineups': [{'lineup_player...,,1,Huddersfield Town,...,2,Blackburn Rovers,,2,3-4-2-1,456661,0,FT,11:30,"[{'type': 'shots on target', 'home': '3', 'awa..."


In [5]:
db_con.events_db.index.astype('int64') 

Int64Index([455326, 455342, 455343, 455344, 455345, 455346, 455347, 455348,
            455349, 456661, 456662, 456663, 456664, 456665, 456666, 456667,
            456668, 456669, 456670, 457133, 456724, 456797, 457469, 457470,
            457471, 457472, 457473, 457474, 457475, 457708, 457709, 457710,
            457711, 457712, 458302, 458303, 458304, 458305, 458306, 458307,
            458308, 458309, 459243, 459244, 459245, 459246, 459247, 459248,
            459249, 459250, 459251, 460121, 459492, 459493, 459562, 459566,
            461997, 462014, 462015, 462016, 462017, 462018, 462228, 462229,
            462230, 462231, 462232, 462233, 462234, 462235, 462236, 462363,
            462364, 462365, 462366, 462559, 463030, 463037, 464563, 464564,
            464565, 464566, 464567, 464568, 464569, 464570, 464678, 464679,
            464680, 464681, 464682, 464683, 464684, 464685, 464686, 464687,
            464688, 465058, 465118, 465369],
           dtype='int64', name='match_id')

In [None]:
db_con.events_db['match_id'].astype('int64') == 455326

In [9]:
db_con.events_db['match_id'] = db_con.events_db['match_id'].astype('int64')

In [10]:
db_con.events_db['match_id'] == '455326'

  result = method(y)


match_id
455326    False
455342    False
455343    False
455344    False
455345    False
455346    False
455347    False
455348    False
455349    False
456661    False
456662    False
456663    False
456664    False
456665    False
456666    False
456667    False
456668    False
456669    False
456670    False
457133    False
456724    False
456797    False
457469    False
457470    False
457471    False
457472    False
457473    False
457474    False
457475    False
457708    False
          ...  
462236    False
462363    False
462364    False
462365    False
462366    False
462559    False
463030    False
463037    False
464563    False
464564    False
464565    False
464566    False
464567    False
464568    False
464569    False
464570    False
464678    False
464679    False
464680    False
464681    False
464682    False
464683    False
464684    False
464685    False
464686    False
464687    False
464688    False
465058    False
465118    False
465369    False
Name: match_id,