# Model evaluation
In some cases, we may not know the number of segments or we may wish to compare a power law against other types of rating models.
For these cases, we can use information criteria to select the best model.

In [1]:
# load tutorial data
from ratingcurve import data
%load_ext autoreload
%autoreload 2

import pymc as pm
import arviz as az
from ratingcurve.ratingmodel import PowerLawRating

from ratingcurve import data

df = data.load('green channel')

Fit the data to ratings with 1 to 4 segments and determine which is best.

In [2]:
%%capture
# OUtput supressed, this will print "Finished" after running each of the four models

segments = [1, 2, 3, 4]
traces = []
for segment in segments:
    print(segment)
    powerrating = PowerLawRating(q=df['q'],
                             h=df['stage'], 
                             q_sigma=df['q_sigma'],
                             segments=segment,
                             prior={'distribution':'uniform'})
    with powerrating:
            mean_field = pm.fit(method='advi', n=150_000)
            trace = mean_field.sample(5000)
            traces.append(pm.compute_log_likelihood(trace)) # Add arg to compute log likelihood

Finished [100%]: Average Loss = -36.637
Finished [100%]: Average Loss = -45.044
Finished [100%]: Average Loss = -39.748
Finished [100%]: Average Loss = -30.945


now use `arviz.compare` to format the output

In [3]:
import warnings; warnings.filterwarnings('ignore')

compare_dict = {f'{i} segment': traces[i-1] for i in segments}
az.compare(compare_dict, ic='waic')

Unnamed: 0,rank,elpd_waic,p_waic,elpd_diff,weight,se,dse,warning,scale
2 segment,0,71.676817,6.716892,0.0,1.0,3.774308,0.0,True,log
3 segment,1,68.701477,9.912723,2.97534,0.0,4.51635,3.546537,True,log
4 segment,2,57.744562,15.794742,13.932255,0.0,8.108971,7.458791,True,log
1 segment,3,53.445265,4.588826,18.231552,3.463896e-13,3.313198,5.063585,True,log


As we expected, the 2-segment model was ranked highest.