# Running MCMC models

In [1]:
%load_ext watermark

In [2]:
import pymc as pm
import pandas as pd
import itertools as itt
import arviz as az
import graphviz
import numpy as np
import importlib as imp
from collections import defaultdict
import data_utils
import pickle
from pathlib import Path

import sys
sys.path.append("../")

from models import OneAdvantage
from models import TeamSpecificAdvantage


DB_PATH = '../data/data.sqlite.db'
STORAGE_PATH = '../storage/'

%watermark --iversions

sys     : 3.9.12 | packaged by conda-forge | (main, Mar 24 2022, 23:25:59) 
[GCC 10.3.0]
numpy   : 1.22.3
graphviz: 0.19.1
pandas  : 1.4.2
pymc    : 4.0.0b6
arviz   : 0.12.0



## Data preparation

First, I retrieve all games on record, generate home-away pairs and define identifiers to those pairs, and calculate correct score differences.

In [3]:
imp.reload(data_utils)
all_games = data_utils.get_data(DB_PATH)

## Data configurations

Now, I outline possible configurations to cut the data for modeling purposes. I will build models using the following data:
 - A dataset spanning 5 seasons starting from 2017, where a team pair is treated as unique every season
 - A dataset that uses 1 season (2020) with non-informative team strength priors (each team strenght pair prior is zero)
 - A dataset that uses 1 season (2020) with informative team strength priors (each team strenght pair prior is equal to the average score difference between the same teams in the 2 seasons beforehand)

In [4]:
data_configs = {
    '5-seasons-non-informative' : {'start_year': 2012, "seasons": 10, "informative_priors" : None, "by_season" : True},
    '2020-non-informative' : {'start_year': 2020, "seasons": 1, "informative_priors" : None, "by_season" : False},
    '2020-informative' : {'start_year': 2020, "seasons": 1, "informative_priors" : 2, "by_season" : False}
}

## Strength advantage priors

For homecourt advantage itself, I use two priors for the mean - 0 (uninformative) and 4 (as estimated by Sokol et al.). For computational reasons, I also experiment with two types of variance priors - a version where it is defined (I choose the value of 10) and a version where it is modelled with a half-Cauchy hyper-prior.

In [5]:
advantage_priors = {
    'flat-hyper' : (0, None),
    'flat-simple' : (0, 10),
    'sokol-hyper' : (4, None),
    'sokol-simple' : (4, 10),    
}

## Model definitions

Two model specifications are used:
 - A model that treats home-court advantage as a uniform phenomenon across all teams
 - A model that allows for team specific home-court advantages (that are linked via a hyperparamer drawn from a "global" home-court advantage)

In [6]:
models = {
    "single": OneAdvantage.Mod,
    "team": TeamSpecificAdvantage.Mod
}

## Run MCMC and save results

In [None]:
for dname, dconfig in data_configs.items():
    
    #get the dataset
    dt = data_utils.compute_data(all_games, **dconfig)
    
    #create the folder
    path = Path(STORAGE_PATH + dname)
    path.mkdir(parents=True, exist_ok=True)
    
    #save the data
    with path.joinpath('data.pickle').open(mode='wb') as f:
        pickle.dump(dt, f)
        
    for pname, pconfig in advantage_priors.items():        
        for mname, model in models.items():
            print("\n \n")
            print(pname, mname)
            print("------------------- \n \n")
            
            TRACE_NAME = path.joinpath("{}-{}.netcdf".format(pname, mname))
            GRAPH_NAME = path.joinpath("{}-{}-graph".format(pname, mname))
            
            if TRACE_NAME.exists():
                print("Already exists - skipping \n")
            else:
                print("Tracing... \n")
                #run the trace
                with pm.Model() as m:    
                    model(data = dt, advantage_prior=pconfig)                
                    trace = pm.sample(draws=2_000, tune=500, cores=6, chains=2, random_seed=42)
            
                pm.model_graph.model_to_graphviz(m).render(GRAPH_NAME, format='png')
                az.to_netcdf(trace, TRACE_NAME)
            
            
                
            