### Economic experiment testing accuracy- vs. profitability-optimised models on real-world data

### Import

In [1]:
import os
import itertools
import time
import pandas as pd
import numpy as np
import random
import copy

from dfg_rating.model import factory
import dfg_rating.viz.jupyter_widgets as DFGViz

from dfg_rating.model.rating.elo_rating import ELORating
from dfg_rating.model.network.base_network import WhiteNetwork
from dfg_rating.model.forecast.true_forecast import LogFunctionForecast
from dfg_rating.model.rating.controlled_trend_rating import ControlledTrendRating, ControlledRandomFunction
from dfg_rating.model.betting.betting import FixedBetting, KellyBetting, ThresholdBetting
from dfg_rating.model.bookmaker.base_bookmaker import BaseBookmaker
from dfg_rating.model.evaluators.accuracy import RankProbabilityScore, Likelihood, ProbabilityDifference, ProbabilityPointer, FavouriteProbability
from dfg_rating.model.evaluators.profitability import BettingReturnsEvaluator
from dfg_rating.model.evaluators.base_evaluators import BettingActivity

from pathlib import Path

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.width = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)



### Specification of the experimental run (betting strategy and odds type)

In [2]:
#to be changed dependent on what strategy (Fixed, Kelly, Threshold) should be used in the run
betting = FixedBetting(100)
betting_description = 'fixed'
#betting = KellyBetting(100)
#betting_description = 'kelly'
#betting = ThresholdBetting(100, threshold = 0.3)
#betting_description = 'threshold_03'

#to be changed dependent on which odds type (average or maximum odds) should be used in the run
odds_type = 'averageodds'
#odds_type = 'maximumodds'

#defines the parameters to be tested in one run
k_options = range(20,25,5)
c0_options = np.arange(-1.2, -0.7, 0.1)
c1_options = np.arange(0.0, 0.5, 0.1)
beta_options = np.arange(0.004, 0.014, 0.002)

### Loading of real data into the framework

In [3]:
#load real-world football data
data_real_world = pd.read_csv(os.path.join(Path(os.getcwd()).parent.absolute(), 'data', 'data_real_anonymised.csv'),sep = ";")


In [4]:
football_network = WhiteNetwork(
    data=data_real_world,
    mapping={
        "node1": {
            "id": "AwayID",
            "name": "AwayTeam",
        },
        "node2": {
            "id": "HomeID",
            "name": "HomeTeam",
        },
        "day": "Date",
        "dayIsTimestamp": True,
        "ts_format": "%d.%m.%Y",
        "tournament": "Div",
        "season": "Season",
        "winner": {
            "result": "ResultFT",
            "translation": {
                "H": "home",
                "D": "draw",
                "A": "away"
            }
        },
        "round": "day",
        "odds": {
            "maximumodds": {
                "home": "OddsHomeMax",
                "draw": "OddsDrawMax",
                "away": "OddsAwayMax"
            },
            "averageodds": {
                "home": "OddsHomeAvg",
                "draw": "OddsDrawAvg",
                "away": "OddsAwayAvg"
            },
        },
        "bets": {}
    }
)

Network loaded correctly


### Some helper functions required in the analysis

In [5]:
#adding the Elo rating with a predefined K-factor
def add_elo_rating(k, n, name):
    n.add_rating(
    rating=ELORating(
        trained=True, 
        rating_name=name,
        **{
            'param_c' : 10,
            'param_d' : 400,
            'param_k' : k,
            'param_w' : 80
        }
    ),
    rating_name=name
)

In [6]:
#aggregation of the relavant measures for evaluation (e.g. likelihood, betting returns, etc.)
def aggregate_measures(list_of_matches, prefix=''):
    
    betting_returns = sum([sum([b[0] for b in m[3]['metrics']['betting_returns']]) for m in list_of_matches])
    betting_activity = sum([m[3]['metrics']['betting_activity']['qty'] for m in list_of_matches])

    return {
        prefix+'_rps': np.mean([m[3]['metrics']['rps'] for m in list_of_matches]),
        prefix+'_likelihood': sum([m[3]['metrics']['likelihood'] for m in list_of_matches]),
        prefix+'_prob_diff': np.mean([m[3]['metrics']['prob_diff'] for m in list_of_matches]),
        prefix+'_draw_prob': np.mean([m[3]['metrics']['draw_prob'] for m in list_of_matches]),
        prefix+'_fav_prob': np.mean([m[3]['metrics']['fav_prob'] for m in list_of_matches]),
        prefix+'_betting_returns': betting_returns,
        prefix+'_betting_returns_expected': sum([sum(b[1] for b in m[3]['metrics']['betting_returns']) for m in list_of_matches]),
        prefix+'_betting_activity': betting_activity,
        prefix+'_betting_returns_per_bet': betting_returns/betting_activity
    }

### Start of the experiment looping over all predefined model parameters

In [7]:
experiment_start_time = time.time()
result_list = []
#to store matches for the best model specifications (needed later for bootstrapping)
bestLikelihood = float('-inf')
matches_oos_Likelihood = []
bestBettingReturns = float('-inf')
matches_oos_BettingReturns = []

#loop over all k options if more than one k is specified
for k in k_options:
    rating_name = f"elo_rating_{k}"
    print(f"Rating <{rating_name}>")
    add_elo_rating(k, football_network, rating_name)
    
    #Loop over all OLR model coefficients that were specified for the run
    for c0, c1, beta in itertools.product(c0_options, c1_options, beta_options):
        print(f'Variables: c0: {c0}, c1: {c1} and beta: {beta}')
        forecast_pointer = f"player_{k}_forecast_{c0:.2f}_{c1:.2f}_{beta:.3f}"
        cell_start_time = time.time()
        football_network.add_forecast(
            forecast=LogFunctionForecast(
                outcomes=['home', 'draw', 'away'], 
                coefficients=[c0, c1], 
                beta_parameter=beta
            ),
            forecast_name=forecast_pointer,
            base_ranking=rating_name
        )
        football_network.add_bets(
            bettor_name='b',
            bookmaker=odds_type,
            betting=betting,
            base_forecast=forecast_pointer
        )
        rps = RankProbabilityScore(outcomes=['home', 'draw', 'away'], forecast_name=forecast_pointer)
        betting_returns = BettingReturnsEvaluator(
            outcomes=['home', 'draw', 'away'], 
            player_name='b', 
            true_model=forecast_pointer,
            bookmaker_name=odds_type
        )
        
        betting_activity = BettingActivity(outcomes=['home', 'draw', 'away'], player_name='b')
        likelihood = Likelihood(outcomes=['home', 'draw', 'away'], forecast_name=forecast_pointer)
        difference = ProbabilityDifference(outcomes=['home', 'draw', 'away'], forecast_name=forecast_pointer)
        draw_probability = ProbabilityPointer(outcomes=['home', 'draw', 'away'], forecast_name=forecast_pointer, probability_index=1)
        fav_probability = FavouriteProbability(outcomes=['home', 'draw', 'away'], forecast_name=forecast_pointer)

        football_network.add_evaluation([
            (rps, 'rps'),
            (betting_returns, 'betting_returns'),
            (betting_activity, 'betting_activity'),
            (likelihood, 'likelihood'),
            (difference, 'prob_diff'),
            (draw_probability, 'draw_prob'),
            (fav_probability, 'fav_prob')
        ])
        all_matches = [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in football_network.iterate_over_games()]
              
        matches_oos = [m for m in all_matches if (m[3]['season'] > 1 and m[3]['season'] < 11)]
        matches_is = [m for m in all_matches if (m[3]['season'] > 10)]
        result_all = aggregate_measures(all_matches, 'all')
        result_is = aggregate_measures(matches_is, 'is')
        result_oos = aggregate_measures(matches_oos, 'oos')
        result = {
            'k': k, 'c0' : c0, 'c1' : c1, 'beta' : beta, **result_is, **result_oos, **result_all
        }
        result_list.append(result)
        
        #update best results if necessary
        if(result_is['is_likelihood'] > bestLikelihood):
            bestLikelihood = result_is['is_likelihood']
            matches_oos_Likelihood = copy.deepcopy(matches_oos)
        if(result_is['is_betting_returns'] > bestBettingReturns):
            bestBettingReturns = result_is['is_betting_returns']
            matches_oos_BettingReturns = copy.deepcopy(matches_oos)
        print(f"Finished in {float(time.time() - cell_start_time)} seconds")
        
print(f"Experiment finished in {float(time.time() - experiment_start_time)} seconds with {len(result_list)} observations.")
df = pd.DataFrame(result_list)


Rating <elo_rating_20>
Variables: c0: -1.2, c1: 0.0 and beta: 0.004


KeyboardInterrupt: 

### Evaluation of the results of this run

In [8]:
#sort full data frame to identify accuracy-optimised and profitability-optimised model
accOpt = df.sort_values(by=['is_likelihood'], ascending = False).iloc[0]
profOpt = df.sort_values(by=['is_betting_returns'], ascending = False).iloc[0]

#obtain data for the best performing models
modelAcc = "c0:" + str(round(accOpt["c0"],2)) + " c1: " + str(round(accOpt["c1"],2)) + " beta: " + str(round(accOpt["beta"],3))
modelProf = "c0:" + str(round(profOpt["c0"],2)) + " c1: " + str(round(profOpt["c1"],2)) + " beta: " + str(round(profOpt["beta"],3))
numberBetsAcc = round(accOpt["oos_betting_activity"],0)
numberBetsProf = round(profOpt["oos_betting_activity"],0)
observedReturnsAcc = round(accOpt["oos_betting_returns"],2)
observedReturnsProf = round(profOpt["oos_betting_returns"],2)
observedReturnsDiff = observedReturnsProf - observedReturnsAcc
observedReturnsPerBetAcc = round(observedReturnsAcc/numberBetsAcc,4)
observedReturnsPerBetProf = round(observedReturnsProf/numberBetsProf,4)
observedReturnsPerBetDiff = observedReturnsPerBetProf - observedReturnsPerBetAcc


#calculate confidence interval based on the matches stored in the previous step

#extract betting returns per match as a basis for bootstrapping from the data
returns_Likelihood = []
returns_BettingReturns = []
for m in matches_oos_Likelihood:
    all_betting_returns = m[3]['metrics']['betting_returns']
    actual = [b[0] for b in all_betting_returns]
    returns_Likelihood += actual
for m in matches_oos_BettingReturns:
    all_betting_returns = m[3]['metrics']['betting_returns']
    actual = [b[0] for b in all_betting_returns]
    returns_BettingReturns += actual

#set seed to allow perfect replicability of bootstrapping
random.seed(10)
    
#perform bootstrapping
distribution = []
for i in range(10000):#please change back to 10000
    distribution.append(np.sum(random.choices(returns_BettingReturns, k=len(returns_BettingReturns))) - np.sum(random.choices(returns_Likelihood, k=len(returns_Likelihood))))

CI_Lower = str(round(np.quantile(distribution, 0.025),2))
CI_Upper = str(round(np.quantile(distribution, 0.975),2))
pBootstrap = str(100*len([i for i in distribution if i <= 0])/len(distribution))

confidenceInterval = "(" + CI_Lower + "," + CI_Upper+ ")" + " p = "+pBootstrap+ "%"


#construct results table
resultTable = pd.DataFrame({'Method': ['Model Selection', 'Number of Bets', 'Observed Returns', 'Observed Returns per bet']})
resultTable["Accuracy"] = [modelAcc, numberBetsAcc, observedReturnsAcc, observedReturnsPerBetAcc]
resultTable["Profitability"] = [modelProf, numberBetsProf, observedReturnsProf, observedReturnsPerBetProf]
resultTable["Difference"] = ["", "", observedReturnsDiff, observedReturnsPerBetDiff]
resultTable["Confidence Interval"] = ["", "", confidenceInterval, ""]

### Storing full results of the run and information needed for Table 5 and Table 6

In [9]:
#df.to_excel(os.path.join(Path(os.getcwd()).parent.absolute(), 'results', f"real_raw_"+betting_description+"_"+odds_type+".xlsx"))
resultTable.to_excel(os.path.join(Path(os.getcwd()).parent.absolute(), 'results', f"real_table_"+betting_description+"_"+odds_type+".xlsx"))
