In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import pymc as pm
import arviz as az
import dill
import os



In [3]:
from thesis_tools.utils.data import *
from thesis_tools.models.frequentist import *
from thesis_tools.models.bayesian_univariate_time_series import *

In [4]:
df = read_panel_data(observations_threshold=0, exclude_2023=False)

In [5]:
# go through all files in the path ../../Stored_Models/bayesian_univariate_time_series/
# and check the number of divergences
# if there are divergences, print the name of the model
divergence_dict = {}
path = "../../Stored_Models/bayesian_univariate_time_series_no_covariates/"
for file in os.listdir(path):
    temp_model = dill.load(open(path + file, "rb"))
    temp_trace = temp_model.get_trace()
    n_divergences = sum(temp_trace['sample_stats']['diverging'].values.flatten())
    divergence_dict[file] = n_divergences
divergence_df = pd.DataFrame(divergence_dict.items(), columns=['model', 'n_divergences'])
divergence_df = divergence_df.sort_values(by='n_divergences', ascending=False)
divergence_df.set_index('model', inplace=True)

In [6]:
divergence_df

Unnamed: 0_level_0,n_divergences
model,Unnamed: 1_level_1
Brazil_Weibull.pkl,949
Brazil_GeneralisedPareto.pkl,80
India_GeneralisedPareto.pkl,0
Canada_GeneralisedPareto.pkl,0
Italy_GeneralisedPareto.pkl,0
Germany_GeneralisedPareto.pkl,0
Japan_Pareto.pkl,0
Southeast Asia_Weibull.pkl,0
Asian Islands_Pareto.pkl,0
Scandinavia_Pareto.pkl,0


In [7]:
def train_or_retrieve_model(
    panel_df:pd.DataFrame, 
    group:str, 
    model_type:str, 
    retrain_if_saved=False
):
    model_name = f"{group}_{model_type}"
    model_path = f"../../Stored_Models/bayesian_univariate_time_series_no_covariates/{model_name}.pkl"
    if os.path.exists(model_path) and not retrain_if_saved:
        with open(model_path, "rb") as f:
            model = dill.load(f)
    else:
        data = copy.deepcopy(panel_df[panel_df['group'] == group])
        if model_type == 'Pareto':
            model = Univariate_Pareto_TimeSeries_NoCovariates(
                panel_df=data
            )
        elif model_type == 'Weibull':
            model = Univariate_Weibull_TimeSeries_NoCovariates(
                panel_df=data
            )
        elif model_type == 'GeneralisedPareto':
            model = Univariate_GeneralisedPareto_TimeSeries_NoCovariates(
                panel_df=data
            )
        model.fit(nuts_sampler='nutpie', target_accept=0.99)
        with open(model_path, "wb") as f:
            dill.dump(model, f)
    return model

In [8]:
for group in df['group'].unique():
    for model_type in ['Pareto', 'Weibull', 'GeneralisedPareto']:
        print(f"Group: {group}, Model: {model_type}")
        model = train_or_retrieve_model(
            panel_df=df, 
            group=group, 
            model_type=model_type, 
            retrain_if_saved=False
        )

Group: Alps, Model: Pareto
Group: Alps, Model: Weibull
Group: Alps, Model: GeneralisedPareto
Group: Asian Islands, Model: Pareto
Group: Asian Islands, Model: Weibull
Group: Asian Islands, Model: GeneralisedPareto
Group: Australia, Model: Pareto
Group: Australia, Model: Weibull
Group: Australia, Model: GeneralisedPareto
Group: Brazil, Model: Pareto
Group: Brazil, Model: Weibull
Group: Brazil, Model: GeneralisedPareto
Group: British Islands, Model: Pareto
Group: British Islands, Model: Weibull
Group: British Islands, Model: GeneralisedPareto
Group: Canada, Model: Pareto
Group: Canada, Model: Weibull
Group: Canada, Model: GeneralisedPareto
Group: China, Model: Pareto
Group: China, Model: Weibull
Group: China, Model: GeneralisedPareto
Group: France, Model: Pareto
Group: France, Model: Weibull
Group: France, Model: GeneralisedPareto
Group: Germany, Model: Pareto
Group: Germany, Model: Weibull
Group: Germany, Model: GeneralisedPareto
Group: India, Model: Pareto
Group: India, Model: Weibull
G