# Economic Experiment

Add description of the experiment here

## Import Statements

In [1]:
from dfg_rating.model import factory
from dfg_rating.model.betting.betting import FixedBetting
from dfg_rating.model.betting.betting import KellyBetting
from dfg_rating.model.bookmaker.base_bookmaker import BaseBookmaker
from dfg_rating.model.evaluators.accuracy import RankProbabilityScore, Likelihood, ProbabilityDifference, ProbabilityPointer, FavouriteProbability
from dfg_rating.model.evaluators.profitability import BettingReturnsEvaluator
from dfg_rating.model.evaluators.base_evaluators import BettingActivity
from dfg_rating.model.forecast.true_forecast import LogFunctionForecast
from dfg_rating.model.network.base_network import BaseNetwork
from dfg_rating.model.rating.controlled_trend_rating import ControlledTrendRating, ControlledRandomFunction

#specify rating error
from dfg_rating.model.rating.base_rating import RatingFunctionError

from dfg_rating.logic.controller import Controller
import pandas as pd
import numpy as np
import time
import math
from tqdm import tqdm
import itertools as it

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.width = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)

## Experiment configuration

Two main things: The controller is the helper that includes the functions to load and save networks. A configuration object with the parameters of the experiment, more cells can be created with different configurations.

In [2]:
main_controller = Controller()

In [3]:
experimentA = dict(
    test_name="ExperimentA", create_data=True,
    bookmaker_error1 = 0.0, bookmaker_margin1 = 0.00,
    bookmaker_error2 = 0.0, bookmaker_margin2 = 0.10,
    bookmaker_error3 = 0.0, bookmaker_margin3 = 0.20,
    betting_bankrole=100,
    rating_error_loc=0, rating_error_scale=50,
    bookmaker_rating_error_loc=0, bookmaker_rating_error_scale=25,
    number_of_leagues=40,
    #number_of_leagues=1,
    number_of_teams=4,
    #number_of_teams=4, 
    number_of_seasons=10,
    #number_of_seasons=1,
    true_forecast=dict(
        coefficients=[-0.9,0.3],
        beta=0.006
    ),
    bookmaker_forecast=dict(
        coefficients=[-1.2, 0.0],
        beta=0.006
    ),
    in_sample_maximum=5,
    number_of_splits=20
)

In [4]:
experimentB = dict(
    test_name="ExperimentB", create_data=True,
    bookmaker_error1 = 0.0, bookmaker_margin1 = 0.00,
    bookmaker_error2 = 0.0, bookmaker_margin2 = 0.10,
    bookmaker_error3 = 0.0, bookmaker_margin3 = 0.20,
    betting_bankrole=100,
    rating_error_loc=0, rating_error_scale=50,
    bookmaker_rating_error_loc=0, bookmaker_rating_error_scale=25,
    number_of_leagues=40,
    number_of_teams=20, number_of_seasons=10,
    true_forecast=dict(
        coefficients=[-0.9,0.3],
        beta=0.006
    ),
    bookmaker_forecast=dict(
        coefficients=[-0.8, 0.2],
        beta=0.006
    ),
    in_sample_maximum=5,
    number_of_splits=20
)

In [5]:
experimentC = dict(
    test_name="ExperimentC", create_data=True,
    bookmaker_error1 = 0.0, bookmaker_margin1 = 0.00,
    bookmaker_error2 = 0.0, bookmaker_margin2 = 0.10,
    bookmaker_error3 = 0.0, bookmaker_margin3 = 0.20,
    betting_bankrole=100,
    rating_error_loc=0, rating_error_scale=50,
    bookmaker_rating_error_loc=0, bookmaker_rating_error_scale=25,
    number_of_leagues=40,
    number_of_teams=20, number_of_seasons=10,
    true_forecast=dict(
        coefficients=[-0.9,0.3],
        beta=0.006
    ),
    bookmaker_forecast=dict(
        coefficients=[-0.8, 0.4],
        beta=0.004
    ),
    in_sample_maximum=5,
    number_of_splits=20
)

### !!Select the right config

In [6]:
# The desired config is stored at config
config = experimentA

## External actors

We create several bookmakers and a betting strategy that are going to interact with the generated networks.

In [7]:
bookmaker1: BaseBookmaker = factory.new_bookmaker(
    'simple',
    error=factory.new_forecast_error(error_type='factor', error=config["bookmaker_error1"], scope='positive'),
    margin=factory.new_bookmaker_margin('simple', margin=config["bookmaker_margin1"])
)
bookmaker2: BaseBookmaker = factory.new_bookmaker(
    'simple',
    error=factory.new_forecast_error(error_type='factor', error=config["bookmaker_error2"], scope='positive'),
    margin=factory.new_bookmaker_margin('simple', margin=config["bookmaker_margin2"])
)
bookmaker3: BaseBookmaker = factory.new_bookmaker(
    'simple',
    error=factory.new_forecast_error(error_type='factor', error=config["bookmaker_error3"], scope='positive'),
    margin=factory.new_bookmaker_margin('simple', margin=config["bookmaker_margin3"])
)

In [8]:
bettingFixed = FixedBetting(config["betting_bankrole"])
bettingKelly = KellyBetting(config["betting_bankrole"])

We also create an error rating that we are going to use for the calculated forecasts

In [9]:
rating_error = RatingFunctionError(error='normal', loc=config["rating_error_loc"], scale=config["rating_error_scale"])
rating_error_bookmaker = RatingFunctionError(error='normal', loc=config["bookmaker_rating_error_loc"], scale=config["bookmaker_rating_error_scale"])

## Generation of networks

Configure the number of leagues for the experiment:

In [10]:
numberLeagues = config["number_of_leagues"]

For every league, we generate the schedule of matches and true dimensions and we add odds for each game.

In [11]:
networks = []

In [12]:
gen_start_time = time.time()
for league in range(numberLeagues):
    if config["create_data"]:
        network: BaseNetwork = factory.new_network(
            'multiple-round-robin',
            teams=config["number_of_teams"],
            days_between_rounds=7,
            seasons=config["number_of_seasons"],
            league_teams=config["number_of_teams"],
            league_promotion=0,
            create=True,
            true_forecast=LogFunctionForecast(
                outcomes=['home', 'draw', 'away'], 
                coefficients = config["true_forecast"]["coefficients"], 
                beta_parameter=config["true_forecast"]["beta"]
            ),
            true_rating=ControlledTrendRating(
                starting_point=ControlledRandomFunction(distribution='normal', loc=1000, scale=100),
                delta=ControlledRandomFunction(distribution='normal', loc=0, scale=3),
                trend=ControlledRandomFunction(distribution='normal', loc=0, scale=20/365),
                season_delta=ControlledRandomFunction(distribution='normal', loc=0, scale=10)
            )
        )
        #adding biased bookmaker forecast to the network
        network.add_forecast(
            forecast=LogFunctionForecast(
                outcomes=['home', 'draw', 'away'], 
                coefficients = config["bookmaker_forecast"]["coefficients"], 
                beta_parameter=config["bookmaker_forecast"]["beta"], 
                home_team_error=rating_error_bookmaker, 
                away_team_error=rating_error_bookmaker
            ),
            forecast_name='bookmaker_forecast',
            base_ranking='true_rating'
        )
        #adding odds    
        network.add_odds(
            bookmaker_name="bm1",
            bookmaker=bookmaker1,
            base_forecast='bookmaker_forecast'
        )
        network.add_odds(
            bookmaker_name="bm2",
            bookmaker=bookmaker2,
            base_forecast='bookmaker_forecast'
        )
        network.add_odds(
            bookmaker_name="bm3",
            bookmaker=bookmaker3,
            base_forecast='bookmaker_forecast'
        )
    else:
        print("Loading network data")
        main_controller.load_network_from_sql(
            network_name=f"{config['test_name']}_network_{league}",
            new_network_name=f"{config['test_name']}_network_{league}"
        )
        network: BaseNetwork = main_controller.networks[f"{config['test_name']}_network_{league}"]
    networks.append(network)
print(f"{numberLeagues} leagues added in {float(time.time() - gen_start_time)} seconds.")

Season 0
0
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
0 current_season
first season
first season
first season
first season
0
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
Season 1
1
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
1 current_season
1
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
Season 2
2
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
2 current_season
2
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
Season 3
3
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
3 current_season
3
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
Season 4
4
<dfg_rating.model.network.multiple_network.LeagueNetwork object at 0x7fa5bf014c18>
4 current_season
4
<dfg_rating.model.network.multiple_network.LeagueNe

# Run

We iterate over the grid of possible parameters for the experiment and aggregate the final results.

### Functions

In [13]:
def aggregate_measures(list_of_matches, prefix='', addBettingMetrics=True):
    measures = {
        prefix+'_rps': np.mean([m[3]['metrics']['rps'] for m in list_of_matches]),
        prefix+'_likelihood': sum([m[3]['metrics']['likelihood'] for m in list_of_matches]),
        prefix+'_prob_diff': np.mean([m[3]['metrics']['prob_diff'] for m in list_of_matches]),
        prefix+'_draw_prob': np.mean([m[3]['metrics']['draw_prob'] for m in list_of_matches]),
        prefix+'_fav_prob': np.mean([m[3]['metrics']['fav_prob'] for m in list_of_matches])
    }
    if addBettingMetrics:
        for m in list_of_matches:
            betting_returns_keys = [k for k in m[3]['metrics'].keys() if k.startswith('betting_returns')]
            betting_activity_keys = [k for k in m[3]['metrics'].keys() if k.startswith('betting_activity')]
            for br in betting_returns_keys:
                measures[f"{prefix}_{br}"] = sum([b[0] for b in m[3]['metrics'][br]]) + measures.get(f"{prefix}_{br}", 0.0)
                #measures[f"{prefix}_{br}_multiplied"] = sum([math.log(b[1]) for b in m[3]['metrics'][br]]) + measures.get(f"{prefix}_{br}_multiplied", 0)
                measures[f"{prefix}_{br}_expected"] = sum([b[2] for b in m[3]['metrics'][br]]) + measures.get(f"{prefix}_{br}_expected", 0.0)
                #measures[f"{prefix}_{br}_expected_multiplied"] = sum([math.log(b[3]) for b in m[3]['metrics'][br]]) + measures.get(f"{prefix}_{br}_expected_multiplied", 0)
            for ba in betting_activity_keys:
                measures[f"{prefix}_{ba}_activity"] = m[3]['metrics'][ba]['qty'] + measures.get(f"{prefix}_{ba}_activity", 0.0)
    return measures

Getting first the values for the true model and the bookmaker model. Remove some of the if they are not needed and if the IS/OOS split is not required.

In [14]:
entire_model_values = {}
for forecast_pointer in ['true_forecast', 'bookmaker_forecast']:
    all_matches =  []
    is_matches =  []
    oos_matches =  []
    for network in tqdm(networks):
        rps = RankProbabilityScore(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        likelihood = Likelihood(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        difference = ProbabilityDifference(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        draw_probability = ProbabilityPointer(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer, probability_index=1)
        fav_probability = FavouriteProbability(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        network.add_evaluation([
            (rps, 'rps'),
            (likelihood, 'likelihood'),
            (difference, 'prob_diff'),
            (draw_probability, 'draw_prob'),
            (fav_probability, 'fav_prob')
        ])
        all_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games()]
        is_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] < 6]
        oos_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] > 5]

    result_all = aggregate_measures(all_matches, f'all_{forecast_pointer}', False)
    result_is = aggregate_measures(is_matches, f'is_{forecast_pointer}', False)
    result_oos = aggregate_measures(oos_matches, f'oos_{forecast_pointer}', False)
    entire_model_values = {
        **entire_model_values,
        **result_is, **result_oos, **result_all
    }

100%|██████████| 40/40 [00:00<00:00, 198.79it/s]
100%|██████████| 40/40 [00:00<00:00, 238.07it/s]


Cross validation spits and results of each permutation

In [15]:
def cross_validate_betting_returns(matches_array, n_splits, model_hint):
    kf = np.array_split(matches_array, n_splits)
    combinations = [(in_s, out_s) for in_s, out_s in it.permutations(range(len(kf)), r=2)]
    result = []
    aggregated_splits = [aggregate_measures(kf[sample_index]) for sample_index in range(n_splits)]
        
    for in_sample_index, out_sample_index in tqdm(combinations):
        result_is = {f"is{k}": v for k, v in aggregated_splits[in_sample_index].items()}
        result_oos = {f"oos{k}": v for k, v in aggregated_splits[out_sample_index].items()}
        result.append({
            "permutation": f"{in_sample_index} x {out_sample_index}",
            "model_hint": model_hint,
            **result_is,
            **result_oos
        })
    return result

### Initialization of results and cross validation output

In [16]:
result_list = []
cv_result = []

### Main loop

In [None]:
experiment_start_time = time.time()
for c0 in np.arange(-1.20, -0.60, 0.1):
    for c1 in np.arange(0.0, 0.60, 0.1):
        for beta in np.arange(0.002, 0.010, 0.002):
            print(f'Variables: c0: {c0}, c1: {c1} and beta: {beta}')
            forecast_pointer = f"player_forecast_{c0:.2f}_{c1:.2f}_{beta:.3f}"
            cell_start_time = time.time()
            all_matches =  []
            is_matches =  []
            oos_matches =  []
            
            bm_start_time = time.time()
            for network_number, network in enumerate(networks):
                if config["create_data"]:
                    network.add_forecast(
                        forecast=LogFunctionForecast(outcomes=['home', 'draw', 'away'], coefficients=[c0, c1], beta_parameter=beta, home_team_error=rating_error, away_team_error=rating_error),
                        forecast_name=forecast_pointer,
                        base_ranking='true_rating'
                    )
                betting_evaluations = []
                for betting, bettingName in [(bettingFixed, "Fixed"), (bettingKelly, "Kelly")]:
                #for betting, bettingName in [(bettingKelly, "Kelly")]:
                    for i in [1, 2, 3]:
                    #for i in [1]:
                        #Pointer of each bookmaker
                        bookmaker = 'bm' + str(i)
                        bettor = 'b' + str(i)

                        network.add_bets(
                            bettor_name=bettor,
                            bookmaker=bookmaker,
                            betting=betting,
                            base_forecast=forecast_pointer
                        )
                        # Bettors can be added and then we can extract all the results at once
                        betting_activity = BettingActivity(outcomes=['home', 'draw', 'away'], player_name = bettor)
                        betting_returns = BettingReturnsEvaluator(outcomes=['home', 'draw', 'away'], player_name = bettor, true_model = 'true_forecast', bookmaker_name = bookmaker)
                        betting_evaluations += [
                            (betting_returns, f'betting_returns_{bettingName}_{bookmaker}'),
                            (betting_activity, f'betting_activity_{bettingName}_{bookmaker}')
                        ]

                rps = RankProbabilityScore(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                likelihood = Likelihood(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                difference = ProbabilityDifference(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                draw_probability = ProbabilityPointer(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer, probability_index=1)
                fav_probability = FavouriteProbability(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                        
                network.add_evaluation(
                    [
                        (rps, 'rps'),
                        (likelihood, 'likelihood'),
                        (difference, 'prob_diff'),
                        (draw_probability, 'draw_prob'),
                        (fav_probability, 'fav_prob')
                    ] + betting_evaluations
                )

                all_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games()]
                is_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] <= config["in_sample_maximum"]]
                oos_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] > config["in_sample_maximum"]]

            print(f"BMs finished in {float(time.time() - bm_start_time)} seconds")
             
            cv_start_time = time.time()
            cv_result += cross_validate_betting_returns(np.array(all_matches), config["number_of_splits"], forecast_pointer)
            print(f"CV finished in {float(time.time() - cv_start_time)} seconds")
            
            am_start_time = time.time()
            result_all = aggregate_measures(all_matches, 'all')
            result_is = aggregate_measures(is_matches, 'is')
            result_oos = aggregate_measures(oos_matches, 'oos')
            result = {
                'bookmaker': bookmaker, 'c0' : c0, 'c1' : c1, 'beta' : beta, **result_is, **result_oos, **result_all, **entire_model_values
            }
                    
            result_list.append(result)
            print(f"Aggregation finished in {float(time.time() - am_start_time)} seconds")    
                               
            print(f"Finished in {float(time.time() - cell_start_time)} seconds")

print(f"Experiment finished in {float(time.time() - experiment_start_time)} seconds with {len(result_list)} observations.")

df = pd.DataFrame(result_list)
df.to_excel(f"Results_{config['test_name']}.xlsx")

if config["create_data"]:
    print("Saving data with the all the forecasts added")
    for network_number, network in tqdm(enumerate(networks)):
        main_controller.networks[f"{config['test_name']}_network_{network_number}"] = network
        main_controller.save_network(f"{config['test_name']}_network_{network_number}")
            

### Process and store cross-validation results

In [25]:
cv_df = pd.DataFrame(cv_result)
betting_returns_columns = []
for strategy, bookmaker_name in it.product(["Fixed", "Kelly"], ["bm1", "bm2", "bm3"]):
    betting_returns_columns += [
        f'oos_betting_returns_{strategy}_{bookmaker_name}', f'oos_betting_returns_{strategy}_{bookmaker_name}_expected'
    ]
subset_of_columns = ['permutation', 'model_hint'] + betting_returns_columns
# Optimizing for RPS
by_rps_df = cv_df.loc[cv_df.groupby('permutation').is_rps.idxmin(), subset_of_columns].set_index('permutation')
# Optimizing for likelihood
by_likelihood_df = cv_df.loc[cv_df.groupby('permutation').is_likelihood.idxmax(), subset_of_columns].set_index('permutation')
# Optimizing for betting
by_betting_df = None
for strategy, bookmaker_name in it.product(["Fixed", "Kelly"], ["bm1", "bm2", "bm3"]):
    betting_combination_name = f"{strategy}_{bookmaker_name}"
    betting_combination_df = cv_df.loc[
        cv_df.groupby('permutation')[f'is_betting_returns_{betting_combination_name}'].idxmax(), 
        ['permutation', 'model_hint', f'oos_betting_returns_{betting_combination_name}', f'oos_betting_returns_{betting_combination_name}_expected']
    ].set_index('permutation')
    if by_betting_df is not None:
        by_betting_df = by_betting_df.join(
            betting_combination_df,
            lsuffix='',
            rsuffix=f'_{betting_combination_name}'
        )
    else:
        by_betting_df = betting_combination_df

cv_study_df = by_rps_df.join(
    by_likelihood_df.join(
        by_betting_df,
        lsuffix='',
        rsuffix='_Fixed_bm1',
    ),
    lsuffix='_rps',
    rsuffix='_likelihood',
)

cv_study_df.to_excel(f"Permutations_{config['test_name']}.xlsx")

In [21]:
cv_df.columns

Index(['permutation', 'model_hint', 'is_rps', 'is_likelihood', 'is_prob_diff',
       'is_draw_prob', 'is_fav_prob', 'is_betting_returns_Fixed_bm1',
       'is_betting_returns_Fixed_bm1_expected', 'is_betting_returns_Fixed_bm2',
       'is_betting_returns_Fixed_bm2_expected', 'is_betting_returns_Fixed_bm3',
       'is_betting_returns_Fixed_bm3_expected', 'is_betting_returns_Kelly_bm1',
       'is_betting_returns_Kelly_bm1_expected', 'is_betting_returns_Kelly_bm2',
       'is_betting_returns_Kelly_bm2_expected', 'is_betting_returns_Kelly_bm3',
       'is_betting_returns_Kelly_bm3_expected',
       'is_betting_activity_Fixed_bm1_activity',
       'is_betting_activity_Fixed_bm2_activity',
       'is_betting_activity_Fixed_bm3_activity',
       'is_betting_activity_Kelly_bm1_activity',
       'is_betting_activity_Kelly_bm2_activity',
       'is_betting_activity_Kelly_bm3_activity', 'oos_rps', 'oos_likelihood',
       'oos_prob_diff', 'oos_draw_prob', 'oos_fav_prob',
       'oos_betting_ret

In [20]:
cv_result[0]

{'permutation': '0 x 1',
 'model_hint': 'player_forecast_-1.20_0.00_0.002',
 'is_rps': 0.21714514165284643,
 'is_likelihood': -254.0063962162792,
 'is_prob_diff': 0.27030193263819924,
 'is_draw_prob': 0.26406842944262754,
 'is_fav_prob': 0.5031167515977858,
 'is_betting_returns_Fixed_bm1': -5.624555995338596,
 'is_betting_returns_Fixed_bm1_expected': 2.67714277949499,
 'is_betting_returns_Fixed_bm2': -6.787033083534034,
 'is_betting_returns_Fixed_bm2_expected': -0.8089342707665406,
 'is_betting_returns_Fixed_bm3': -7.907218797812383,
 'is_betting_returns_Fixed_bm3_expected': -1.7098033206762995,
 'is_betting_returns_Kelly_bm1': -5.624555995338596,
 'is_betting_returns_Kelly_bm1_expected': 2.67714277949499,
 'is_betting_returns_Kelly_bm2': -6.787033083534034,
 'is_betting_returns_Kelly_bm2_expected': -0.8089342707665406,
 'is_betting_returns_Kelly_bm3': -7.907218797812383,
 'is_betting_returns_Kelly_bm3_expected': -1.7098033206762995,
 'is_betting_activity_Fixed_bm1_activity': 403.0,
 '