In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import tsaugmentation as tsag
import numpy as np

In [None]:
from sklearn.metrics import mean_squared_error

def mase(n,seas,h,y,f):
    return np.mean(((n-seas)/h
            * (np.sum(np.abs(y[n:n+h,:] - f), axis=0)
               / np.sum(np.abs(y[seas:n, :] - y[:n-seas, :]), axis=0))))

def calculate_metrics(pred_samples,
                      groups):

    pred_s0 = pred_samples.shape[0]
    pred_s1 = pred_samples.shape[1]
    pred_s2 = pred_samples.shape[2]

    pred_samples = pred_samples.reshape(pred_s0, pred_s1*pred_s2, order='F')

    seasonality = groups['seasonality']
    h = groups['h']
    
    n = groups['predict']['n']
    s = groups['predict']['s']
    y_f = groups['predict']['data'].reshape(s, n).T
    y_all_g = {}
    f_all_g = {}
    
    mase_ = {}
    rmse_ = {}
    
    # Bottom
    y_all_g['bottom'] = y_f
    f_all_g['bottom'] = np.mean(pred_samples, axis=0).reshape(s, n).T[n-h:n,:]
    
    mase_['bottom'] = np.round(mase(n=n-h, 
                                     seas=seasonality, 
                                     h=h, 
                                     y=y_f, 
                                     f=np.mean(pred_samples, axis=0).reshape(s, n).T[n-h:n,:]),3)
    rmse_['bottom'] = np.round(mean_squared_error(y_f[n-h:n,:], np.mean(pred_samples, axis=0).reshape(s, n).T[n-h:n,:], squared=False), 3)

    # Total
    y_all_g['total'] = np.sum(y_f, axis=1).reshape(-1,1)
    f_all_g['total'] = np.sum(np.mean(pred_samples, axis=0).reshape(s, n).T[n-h:n,:], axis=1).reshape(-1,1)
    
    mase_['total'] = np.round(mase(n=n-h, 
                                     seas=seasonality, 
                                     h=h, 
                                     y=np.sum(y_f, axis=1).reshape(-1,1), 
                                     f=np.sum(np.mean(pred_samples, axis=0).reshape(s, n).T[n-h:n,:], axis=1).reshape(-1,1))
                            ,3)
    rmse_['total'] = np.round(mean_squared_error(np.sum(y_f, axis=1).reshape(-1,1)[n-h:n,:], 
                                             np.sum(np.mean(pred_samples, axis=0).reshape(s, n).T[n-h:n,:],axis=1).reshape(-1,1), 
                                             squared=False), 3)

    # Groups
    idx_dict_new = {}
    for group in list(groups['predict']['groups_names'].keys()):
        y_g = np.zeros((groups['predict']['n'], groups['predict']['groups_names'][group].shape[0]))
        f_g = np.zeros((h, groups['predict']['groups_names'][group].shape[0]))

        for idx, name in enumerate(groups['predict']['groups_names'][group]):               

            g_n = groups['predict']['groups_n'][group]

            idx_dict_new[name] = np.where(groups['predict']['groups_idx'][group]==idx,1,0)

            y_g[:,idx] = np.sum(idx_dict_new[name]*y_f, axis=1)
            f_g[:,idx] = np.sum(idx_dict_new[name]*np.mean(pred_samples, axis=0).reshape(s, n).T, axis=1)[n-h:n]

        y_all_g[group] = np.sum(y_g, axis=1).reshape(-1,1)
        f_all_g[group] = np.sum(f_g, axis=1).reshape(-1,1)

        mase_[group] = np.round(mase(n=n-h, 
                                     seas=seasonality, 
                                     h=h, 
                                     y=y_g, 
                                     f=f_g)
                                ,3)

        rmse_[group] = np.round(mean_squared_error(y_g[n-h:n,:], f_g, squared=False), 3)

    # All
    y_all = np.concatenate([y_all_g[x] for x in y_all_g], 1)
    f_all = np.concatenate([f_all_g[x] for x in f_all_g], 1)

    mase_['all'] = np.round(mase(n=n-h, 
                         seas=seasonality, 
                         h=h, 
                         y=y_all, 
                         f=f_all),3)
    rmse_['all'] = np.round(mean_squared_error(y_all[n-h:n,:], f_all, squared=False), 3)
    
    results = {}
    results['mase'] = mase_
    results['rmse'] = rmse_
    return results

In [6]:
dataset = tsag.preprocessing.PreprocessDatasets('prison')
groups = dataset.apply_preprocess()
pred_mint = pd.read_csv('results_prison_hts.csv', delimiter=';')

In [13]:
def calculate_metrics_mint(pred_mint, groups):
    sort_groups=[]
    groups_names = pred_mint.columns[:-3]
    for group in groups_names:
        pred_mint = pred_mint.loc[(pred_mint[group]!='<aggregated>')]
        sort_group = np.unique(groups['train']['groups_names'][group.lower()][groups['train']['groups_idx'][group.lower()]])
        pred_mint[group] = pred_mint[group].astype("category")
        pred_mint[group].cat.set_categories(sort_group, inplace=True)

    pred_mint = pred_mint.sort_values([k.title() for k in groups['train']['groups_names']])

    pred_mint = pred_mint.reset_index().drop('index', axis=1)

    for group in groups_names:
        # Assert order is correct between original dataset and predictions
        np.testing.assert_array_equal(np.unique(pred_mint[group]), np.unique(groups['train']['groups_names'][group.lower()][groups['train']['groups_idx'][group.lower()]]))                           

    h = groups['h']
    s = groups['train']['s']
    n = groups['train']['n']

    pred = pred_mint['.mean'].to_numpy().reshape(s, h).T
    pred_complete = np.concatenate((np.zeros((n, s)), pred), axis=0)[np.newaxis,:,:]
    return pred_complete

In [14]:
pred_complete = calculate_metrics_mint(pred_mint, groups)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_mint[group] = pred_mint[group].astype("category")


In [20]:
calculate_metrics(pred_complete, groups)

{'mase': {'bottom': 2.058,
  'total': 0.895,
  'state': 1.784,
  'gender': 0.914,
  'legal': 2.317,
  'all': 1.928},
 'rmse': {'bottom': 116.29,
  'total': 1145.002,
  'state': 344.181,
  'gender': 574.731,
  'legal': 1130.692,
  'all': 230.592}}