# Economic Experiment

Add description of the experiment here

## Import Statements

In [14]:
from dfg_rating.model import factory
from dfg_rating.model.betting.betting import FixedBetting, BaseBetting
from dfg_rating.model.bookmaker.base_bookmaker import BaseBookmaker
from dfg_rating.model.evaluators.accuracy import RankProbabilityScore, Likelihood, ProbabilityDifference, ProbabilityPointer, FavouriteProbability
from dfg_rating.model.evaluators.profitability import BettingReturnsEvaluator
from dfg_rating.model.evaluators.base_evaluators import BettingActivity
from dfg_rating.model.forecast.base_forecast import SimpleForecast, BaseForecast
from dfg_rating.model.forecast.true_forecast import LogFunctionForecast
from dfg_rating.model.network.base_network import BaseNetwork
from dfg_rating.model.rating.controlled_trend_rating import ControlledTrendRating, ControlledRandomFunction

#specify rating error
from dfg_rating.model.rating.base_rating import RatingFunctionError

import networkx as nx
import pandas as pd
import numpy as np
import time
from tqdm import tqdm

from dfg_rating.model.network.simple_network import RoundRobinNetwork

pd.set_option('display.max_columns', None)
pd.options.display.width = None
pd.set_option('display.float_format', lambda x: '%.5f' % x)

## External actors

We create a bookmaker and a betting strategy that are going to interact with the generated networks.

In [2]:
bookmaker: BaseBookmaker = factory.new_bookmaker(
    'simple',
    error=factory.new_forecast_error(error_type='factor', error=0.0, scope='positive'),
    margin=factory.new_bookmaker_margin('simple', margin=0.10)
)

In [3]:
betting = FixedBetting(100)

We also create an error rating that we are going to use for the calculated forecasts

In [4]:
rating_error = RatingFunctionError(error='normal', loc=0, scale=50)
rating_error_bookmaker = RatingFunctionError(error='normal', loc=0, scale=25)

## Generation of networks

Configure the number of leagues for the experiment:

In [5]:
numberLeagues = 5

For every league, we generate the schedule of matches and true dimensions and we add odds for each game.

In [6]:
networks = []
gen_start_time = time.time()
for league in range(numberLeagues):
    print(f"League : {league}")
    network: BaseNetwork = factory.new_network(
        'multiple-round-robin',
        teams=20,
        days_between_rounds=7,
        seasons=10,
        league_teams=20,
        league_promotion=0,
        create=True,
        true_forecast=LogFunctionForecast(
            outcomes=['home', 'draw', 'away'], 
            coefficients = [-0.9,0.3], 
            beta_parameter=0.006
        ),
        true_rating=ControlledTrendRating(
            starting_point=ControlledRandomFunction(distribution='normal', loc=1000, scale=100),
            delta=ControlledRandomFunction(distribution='normal', loc=0, scale=3),
            trend=ControlledRandomFunction(distribution='normal', loc=0, scale=20/365),
            season_delta=ControlledRandomFunction(distribution='normal', loc=0, scale=10)
        )
    )
    #adding biased bookmaker forecast to the network
    network.add_forecast(
        forecast=LogFunctionForecast(
            outcomes=['home', 'draw', 'away'], 
            coefficients=[-0.75, 0.45], 
            beta_parameter=0.003, 
            home_team_error=rating_error_bookmaker, 
            away_team_error=rating_error_bookmaker
        ),
        forecast_name='bookmaker_forecast',
        base_ranking='true_rating'
    )
    #adding odds    
    network.add_odds(
        bookmaker_name="bm",
        bookmaker=bookmaker,
        base_forecast='bookmaker_forecast'
    )

    networks.append(network)
print(f"{numberLeagues} leagues added in {float(time.time() - gen_start_time)} seconds.")

League : 0
League : 1
League : 2
League : 3
League : 4
5 leagues added in 5.904418468475342 seconds.


## Main loop

We iterate over the grid of possible parameters for the experiment and aggregate the final results.

In [9]:
def aggregate_measures(list_of_matches, prefix=''):
    return {
        prefix+'_rps': np.mean([m[3]['metrics']['rps'] for m in list_of_matches]),
        prefix+'_likelihood': sum([m[3]['metrics']['likelihood'] for m in list_of_matches]),
        prefix+'_prob_diff': np.mean([m[3]['metrics']['prob_diff'] for m in list_of_matches]),
        prefix+'_draw_prob': np.mean([m[3]['metrics']['draw_prob'] for m in list_of_matches]),
        prefix+'_fav_prob': np.mean([m[3]['metrics']['fav_prob'] for m in list_of_matches]),
        prefix+'_betting_returns': sum([sum([b[0] for b in m[3]['metrics']['betting_returns']]) for m in list_of_matches]),
        prefix+'_betting_returns_expected': sum([sum(b[1] for b in m[3]['metrics']['betting_returns']) for m in list_of_matches]),
        prefix+'_betting_activity': sum([m[3]['metrics']['betting_activity']['qty'] for m in list_of_matches])
    }

Getting first the values for the true model and the bookmaker model. Remove some of the if they are not needed and if the IS/OOS split is not required.

In [21]:
entire_model_values = []
for forecast_pointer in ['true_forecast', 'bookmaker_forecast']:
    for network in tqdm(networks):
        rps = RankProbabilityScore(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        likelihood = Likelihood(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        difference = ProbabilityDifference(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        draw_probability = ProbabilityPointer(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer, probability_pointer=1)
        fav_probability = FavouriteProbability(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
        network.add_evaluation([
            (rps, 'rps'),
            (likelihood, 'likelihood'),
            (difference, 'prob_diff'),
            (draw_probability, 'draw_prob'),
            (fav_probability, 'fav_prob')
        ])
        all_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games()]
        is_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] < 6]
        oos_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] > 5]

        result_all = aggregate_measures(all_matches, f'all_{forecast_pointer}')
        result_is = aggregate_measures(is_matches, f'is_{forecast_pointer}')
        result_oos = aggregate_measures(oos_matches, f'oos_{forecast_pointer}')
        entire_model_values.append({
            **result_is, **result_oos, **result_all
        })
print(entire_model_values[0])
    

100%|██████████| 5/5 [00:05<00:00,  1.05s/it]
100%|██████████| 5/5 [00:05<00:00,  1.19s/it]

{'is_true_forecast_rps': 0.19597771288570784, 'is_true_forecast_likelihood': -52431.33811616916, 'is_true_forecast_prob_diff': 4183.828049063095, 'is_true_forecast_draw_prob': 22318.175844393932, 'is_true_forecast_fav_prob': 27096.44618003128, 'is_true_forecast_betting_returns': -4383.371759462539, 'is_true_forecast_betting_returns_expected': -5169.469741101881, 'is_true_forecast_betting_activity': 48583, 'oos_true_forecast_rps': 0.1682502038827507, 'oos_true_forecast_likelihood': -30755.517557299638, 'oos_true_forecast_prob_diff': 2528.1145815004443, 'oos_true_forecast_draw_prob': 15188.925124594822, 'oos_true_forecast_fav_prob': 20269.616930202348, 'oos_true_forecast_betting_returns': -6308.188336856992, 'oos_true_forecast_betting_returns_expected': -6729.601019783642, 'oos_true_forecast_betting_activity': 32737, 'all_true_forecast_rps': 0.18488670928452494, 'all_true_forecast_likelihood': -83186.85567346963, 'all_true_forecast_prob_diff': 6711.942630563642, 'all_true_forecast_draw_p




In [23]:
result_list = []
experiment_start_time = time.time()
for c0 in np.arange(-1.20, -0.45, 0.15):
    for c1 in np.arange(0.0, 0.75, 0.15):
        for beta in np.arange(0.0015, 0.009, 0.0015):
            
            print(f'Variables: c0: {c0}, c1: {c1} and beta: {beta}')
            forecast_pointer = 'player_forecast'+'_'+str(c0)+'_'+str(c1)+'_'+str(beta)
            cell_start_time = time.time()
            all_matches =  []
            is_matches =  []
            oos_matches =  []
            
            for network_index, network in enumerate(networks):
                rps = RankProbabilityScore(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                betting_returns = BettingReturnsEvaluator(outcomes=['home', 'draw', 'away'], player_name = 'b', true_model = 'true_forecast', bookmaker_name = 'bm')
                betting_activity = BettingActivity(outcomes=['home', 'draw', 'away'], player_name = 'b')
                likelihood = Likelihood(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                difference = ProbabilityDifference(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
                draw_probability = ProbabilityPointer(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer, probability_pointer=1)
                fav_probability = FavouriteProbability(outcomes=['home', 'draw', 'away'], forecast_name = forecast_pointer)
            
                network.add_forecast(
                forecast=LogFunctionForecast(outcomes=['home', 'draw', 'away'], coefficients=[c0, c1], beta_parameter=beta, home_team_error=rating_error, away_team_error=rating_error),
                forecast_name=forecast_pointer,
                base_ranking='true_rating'
                )
                
                network.add_bets(
                bettor_name='b',
                bookmaker='bm',
                betting=betting,
                base_forecast=forecast_pointer
                )
            
                network.add_evaluation([
                    (rps, 'rps'),
                    (betting_returns, 'betting_returns'),
                    (betting_activity, 'betting_activity'),
                    (likelihood, 'likelihood'),
                    (difference, 'prob_diff'),
                    (draw_probability, 'draw_prob'),
                    (fav_probability, 'fav_prob')
                ])
                #network.serialize_network('economic_network')
                
                all_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games()]
                is_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] < 6]
                oos_matches += [(a,h, match_id, match_attributes) for a,h, match_id, match_attributes in network.iterate_over_games() if match_attributes['season'] > 5]
                
            result_all = aggregate_measures(all_matches, 'all')
            result_is = aggregate_measures(is_matches, 'is')
            result_oos = aggregate_measures(oos_matches, 'oos')
            network_values = entire_model_values[network_index]
            result = {
                'c0' : c0, 'c1' : c1, 'beta' : beta, **result_is, **result_oos, **result_all, **network_values
            }
            #print(len(all_matches))
            result_list.append(result)
            print(f"Finished in {float(time.time() - cell_start_time)} seconds")
            
                 
            
#network.export(forecasts = ['bookmaker_forecast', 'true_forecast', 'player_forecast'+'_'+str(c0)+'_'+str(c1)+'_'+str(beta)],
#                      metrics = ['rps', 'betting_returns', 'loglikelihood'],
#                      odds=['bm'])
            
            
print(f"Experiment finished in {float(time.time() - experiment_start_time)} seconds with {len(result_list)} observations.")

df = pd.DataFrame(result_list)
df.to_excel("Results_Economics.xlsx")
            

Variables: c0: -1.2, c1: 0.0 and beta: 0.0015
Finished in 2.5389621257781982 seconds
Variables: c0: -1.2, c1: 0.0 and beta: 0.003
Finished in 2.4380722045898438 seconds
Variables: c0: -1.2, c1: 0.0 and beta: 0.0045000000000000005
Finished in 2.648677110671997 seconds
Variables: c0: -1.2, c1: 0.0 and beta: 0.006
Finished in 2.8308680057525635 seconds
Variables: c0: -1.2, c1: 0.0 and beta: 0.0075
Finished in 2.6460928916931152 seconds
Variables: c0: -1.2, c1: 0.15 and beta: 0.0015
Finished in 2.8348803520202637 seconds
Variables: c0: -1.2, c1: 0.15 and beta: 0.003
Finished in 2.801708221435547 seconds
Variables: c0: -1.2, c1: 0.15 and beta: 0.0045000000000000005
Finished in 4.615953207015991 seconds
Variables: c0: -1.2, c1: 0.15 and beta: 0.006
Finished in 2.5255086421966553 seconds
Variables: c0: -1.2, c1: 0.15 and beta: 0.0075
Finished in 2.558708906173706 seconds
Variables: c0: -1.2, c1: 0.3 and beta: 0.0015
Finished in 2.6914284229278564 seconds
Variables: c0: -1.2, c1: 0.3 and beta:

In [10]:
for a,h,m_id,attributes in network.iterate_over_games():
    print([v for v in attributes['metrics']['betting_returns']])

[(0.0, -0.0), (-0.0, -0.0), (-1.0, -0.08991289965839833)]
[(-1.0, -0.008650390683704434), (-0.0, -0.0), (0.0, -0.0)]
[(0.0, -0.0), (-0.0, -0.0), (-0.0, -0.0)]
[(-1.0, 0.30191476664492334), (0.0, -0.0), (-0.0, -0.0)]
[(0.0, -0.0), (-0.0, -0.0), (-1.0, -0.17947428695327305)]
[(-1.0, 0.12218141113664616), (-0.0, -0.0), (0.0, -0.0)]
[(0.0, 0.0), (-0.0, -0.0), (-1.0, -0.2510100628655365)]
[(0.0, -0.0), (-1.0, 0.20937234053855036), (-1.0, 0.300273318083752)]
[(0.0, -0.0), (-1.0, -0.206468030654528), (-1.0, -0.23783434090807143)]
[(0.0, -0.0), (-0.0, -0.0), (-1.0, -0.21263761078116195)]
[(-0.0, -0.0), (0.0, 0.0), (-1.0, 0.5587158286876608)]
[(0.0, -0.0), (-0.0, -0.0), (-1.0, 0.007410739353999496)]
[(-0.0, -0.0), (0.0, -0.0), (-1.0, -0.08991427813929709)]
[(-0.0, -0.0), (0.0, -0.0), (-1.0, 0.09342043041484205)]
[(-0.0, 0.0), (0.0, -0.0), (-1.0, -0.29024383089297734)]
[(1.6461424721553763, 0.36133382487996957), (-0.0, -0.0), (-1.0, -0.4598924421528252)]
[(-1.0, 0.425912416952529), (0.0, -0.0), 