# Introduction

This notebook is used to test SoccerAPI features

## Import

In [1]:
import pandas as pd
import logging
from os import listdir
from os.path import isfile, join
import psycopg2
from sqlalchemy import create_engine

## Params

In [2]:
user = 'postgres'
password = 'postgres'
databasename = 'soccer-db'
host = 'localhost'
port = '5432'

## Functions

### [db_connect] connect to postgres db and give back conn object

In [3]:
def db_connect():
    try:
        conn = psycopg2.connect("dbname='{0}' user='{1}' host='{2}' password='{3}'".format(databasename, user, host, password))
    except:
        print("I am unable to connect to the database")

    # Open a cursor to perform database operations
    return conn

### [db_get_seasons_name] get list of seasons name

In [4]:
def db_get_seasons(cur):
    cur.execute("select name from seasons")
    rows = cur.fetchall()
    return [x[0] for x in rows]

### [db_confrontation_history_to_df] get confrontation history between 2 teams as dataframe

In [5]:
def db_confrontation_history_to_df(cur, hometeam, awayteam, nb_games = 100, season_min = '1970/1971'):
    cur.execute("select * from confrontation_history('{0}','{1}',{2},'{3}')".format(hometeam, awayteam, nb_games, season_min))
    colnames = [desc[0] for desc in cur.description]
    return pd.DataFrame(data=cur.fetchall(), columns=colnames)

### [db_week_dates_to_df] call to week_dates database function, which return dates associated to each stage of type "week"

In [6]:
def db_week_dates_to_df(cur, competition, season):
    cur.execute("select * from week_dates('{0}', '{1}')".format(competition, season))
    colnames = [desc[0] for desc in cur.description]
    return pd.DataFrame(data=cur.fetchall(), columns=colnames)

### [check_week_dates_inconsistency_to_df] check for inconsistency in week dates

In [15]:
# check for inconsistency in week dates - ie: first week is not week 01
# can happen as dates are sometime messed up on ligue1.com website
# returns a dataframe containing these inconsistencies
def check_week_dates_inconsistency_to_df(competitionname):
    conn = db_connect()
    cur = conn.cursor()
    seasons = db_get_seasons(cur)
    df = db_week_dates_to_df(cur,competitionname, '2016/2017')
    df2 = pd.DataFrame(columns=df.columns)
    i = 0
    for season in seasons:
        df = db_week_dates_to_df(cur,competitionname,season)
        if df.loc[0]['stage'] != 'Week 01':
            df2.loc[i] = df.loc[0]
            i = i+1
    conn.close()
    return df2

### [get_delayed_fixtures_to_df] get delayed fixtures for given competition/season, returns dataframe

In [35]:
def get_delayed_fixtures_to_df(cur, competition, season):
    df = db_week_dates_to_df(cur, competition, season)
    df2 = pd.DataFrame(columns=df.columns)
    prev_stage_id = int(df['stage'][0][-2:])
    i = 0
    j = 1
    for stage in df['stage'][1:]:
        curr_stage_id = int(stage[-2:])
        if(curr_stage_id < prev_stage_id):
            df2.loc[i] = df.loc[j]
            i = i+1
        prev_stage_id = curr_stage_id
        j = j+1
    return df2

### [get_week_unique_end_dates_to_df] get week unique end dates for given competition/season, returns dataframe

In [55]:
def get_week_unique_end_dates_to_df(cur, competition, season):
    df1 = get_delayed_fixtures_to_df(cur, competition, season)
    df2 = db_week_dates_to_df(cur, competition, season)
    df3 = df2.merge(df1, on=['season','competition','stage', 'date'], how='left')
    df4 = df3[df3.nb_games_y.isnull()][['stage','date']]
    return df4.groupby(['stage'], sort=False)['date'].max()

### [] process all season

## Test API

In [32]:
## TEST CONFRONTATION HISTORY FUNCTION ##

# connect to database
conn = db_connect()
cur = conn.cursor()
#df = db_confrontation_history_to_df(cur,'psg','om')
#print(df)
#conn.close()

In [33]:
## TEST WEEK_DATES FUNCTION ##
df = check_week_dates_inconsistency_to_df('Ligue 1')
if df.shape[0]==0:
    print('no inconsistency')

no inconsistency


In [48]:
## TEST DELAYED FIXTURES ##
competition = 'Ligue 1'
season = '2016/2017'
def something_to_df(cur, competition, season):
    df1 = get_delayed_fixtures_to_df(cur, competition, season)
    df2 = db_week_dates_to_df(cur, competition, season)
    df3 = df2.merge(df1, on=['season','competition','stage', 'date'], how='left')
    df4 = df3[df3.nb_games_y.isnull()][['stage','date']]
    return df4.groupby(['stage'], sort=False)['date'].max()
    

In [51]:
df5 = df4.groupby(['stage'], sort=False)['date'].max()

In [53]:
df5.shape[0]

38

In [54]:
df5

stage
Week 01    2016-08-14
Week 02    2016-08-21
Week 03    2016-08-28
Week 04    2016-09-11
Week 05    2016-09-18
Week 06    2016-09-21
Week 07    2016-09-25
Week 08    2016-10-02
Week 09    2016-10-16
Week 10    2016-10-23
Week 11    2016-10-30
Week 12    2016-11-06
Week 13    2016-11-20
Week 14    2016-11-27
Week 15    2016-11-30
Week 16    2016-12-04
Week 17    2016-12-11
Week 18    2016-12-18
Week 19    2016-12-21
Week 20    2017-01-15
Week 21    2017-01-22
Week 22    2017-01-29
Week 23    2017-02-05
Week 24    2017-02-08
Week 25    2017-02-12
Week 26    2017-02-19
Week 27    2017-02-26
Week 28    2017-03-05
Week 29    2017-03-12
Week 30    2017-03-19
Week 31    2017-04-02
Week 32    2017-04-09
Week 33    2017-04-16
Week 34    2017-04-23
Week 35    2017-04-30
Week 36    2017-05-07
Week 37    2017-05-14
Week 38    2017-05-20
Name: date, dtype: object