# Install Libraries

In [1]:
# !pip install git+https://github.com/sashml/betting_analytics.git
 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sqlite3

from betalytics.soccer.const import BOOKIE, MATCH_INFO 
from betalytics.soccer.loader.football_data_loader import load_and_normalize_data
from betalytics.soccer.strategies.all import apply_results
from betalytics.soccer.stats.team_ratings import get_standings_table

%matplotlib inline

# Load Data

In [2]:
# from google.colab import files

# uploaded = files.upload()

# for fn in uploaded.keys():
#   print('User uploaded file "{name}" with length {length} bytes'.format(
#       name=fn, length=len(uploaded[fn])))

In [3]:
# import zipfile
# with zipfile.ZipFile('database.sqlite (1).zip', 'r') as zip_ref:
#   zip_ref.extractall('.')

In [28]:
data = load_and_normalize_data(db_file_name='../../data/database.sqlite', bookie='BET365')

Dataset[Full] Shape = (139555, 13)
Dataset[DropNA] Shape = (124331, 13)


In [35]:
# Review how many matches do we have
season_gr = data[['LEAGUE', 'SEASON']].groupby(by=['LEAGUE', 'SEASON']).size().unstack(fill_value=0)
season_gr.head(5)

SEASON,2002/2003,2003/2004,2004/2005,2005/2006,2006/2007,2007/2008,2008/2009,2009/2010,2010/2011,2011/2012,2012/2013,2013/2014,2014/2015,2015/2016,2016/2017,2017/2018,2018/2019,2019/2020
LEAGUE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Belgium.Jupiler League,107,303,162,302,304,306,306,210,240,240,240,240,240,240,240,240,240,54
England.Championship,515,551,392,552,552,552,552,552,552,552,551,552,552,552,552,552,551,84
England.Conference,0,0,0,461,549,552,537,506,552,516,552,552,552,552,552,552,552,129
England.League 1,345,504,376,552,552,552,552,552,552,552,552,552,551,552,552,552,551,79
England.League 2,328,472,440,552,552,552,552,552,552,552,551,552,552,552,552,552,552,95


# Apply results

In [30]:
match_results = apply_results(data)
match_results = match_results.sort_values(by='DATE')
match_results.loc[:, [
    'DATE','RESULT', 'RESULT_ON_HOME', 'RESULT_ON_FAVORITE', 'RESULT_ON_DOG', 
    'ODDS_ON_HOME', 'ODDS_ON_FAVORITE', 'ODDS_ON_DOG',
    'HOME_ODDS', 'DRAW_ODDS', 'AWAY_ODDS']].tail(5)

Unnamed: 0,DATE,RESULT,RESULT_ON_HOME,RESULT_ON_FAVORITE,RESULT_ON_DOG,ODDS_ON_HOME,ODDS_ON_FAVORITE,ODDS_ON_DOG,HOME_ODDS,DRAW_ODDS,AWAY_ODDS
5,2019-09-16 00:00:00,A,OTHER,OTHER,A,1.5,1.5,6.5,1.5,4.33,6.5
4,2019-09-16 00:00:00,H,H,OTHER,OTHER,2.87,2.37,3.4,2.87,3.4,2.37
3,2019-09-16 00:00:00,D,OTHER,OTHER,D,2.62,2.6,3.5,2.62,3.5,2.6
2,2019-09-16 00:00:00,H,H,OTHER,OTHER,3.75,1.83,4.0,3.75,4.0,1.83
0,2019-09-16 00:00:00,A,OTHER,A,OTHER,2.8,2.7,3.0,2.8,3.0,2.7


In [31]:
rated_team = {}
seasons = sorted(match_results['SEASON'].unique())
for league in match_results['LEAGUE'].unique():
    for season in seasons:
        prev_season_data = match_results[
            (match_results['SEASON'] == season) & 
            (match_results['LEAGUE'] == league)
        ]
        if prev_season_data.empty:
            continue
        teams = get_standings_table(prev_season_data, n_teams=5)
        rated_team.setdefault(league, {}).setdefault(season, teams)
print('CALCULATED THE MOST RATED TEAMS!')

CALCULATED THE MOST RATED TEAMS!


# Review results

In [32]:
sorted(rated_team.keys())

['Belgium.Jupiler League',
 'England.Championship',
 'England.Conference',
 'England.League 1',
 'England.League 2',
 'England.Premier League',
 'France.Division 2',
 'France.Le Championnat',
 'Germany.Bundesliga 1',
 'Germany.Bundesliga 2',
 'Greece.Ethniki Katigoria',
 'Italy.Serie A',
 'Italy.Serie B',
 'Netherlands.Eredivisie',
 'Portugal.Liga I',
 'Scotland.Division 1',
 'Scotland.Division 2',
 'Scotland.Division 3',
 'Scotland.Premier League',
 'Spain.La Liga Primera Division',
 'Spain.La Liga Segunda Division',
 'Turkey.Futbol Ligi 1']

In [34]:
rated_team['Italy.Serie A']['2019/2020']

RESULT,A,D,H,HOME_SCORE,HOME_MAX_SCORE,A.1,D.1,H.1,AWAY_SCORE,AWAY_MAX_SCORE,SCORE,MAX_SCORE,WIN_RATE
I1.Inter,0,0,2,6,6,1,0,0,3,3,9,9,1.0
I1.Bologna,0,0,1,3,3,1,1,0,4,6,7,9,0.777778
I1.Juventus,0,0,1,3,3,1,1,0,4,6,7,9,0.777778
I1.Atalanta,1,0,0,0,3,2,0,0,6,6,6,9,0.666667
I1.Torino,1,0,1,3,6,1,0,0,3,3,6,9,0.666667



# Summary

* It was quick introduction to new library betting_analytics
* Demonstrated simple approach to detect the best N-rated teams season over season



