# Importing the packages and data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import scipy
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from scipy.special import gamma, digamma, polygamma
import math

from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import random

import scipy.optimize
from scipy.optimize import Bounds

from scipy.stats import ttest_ind

In [3]:
from prettytable import PrettyTable

In [4]:
import dirichlet_regression

In [5]:
from scipy.optimize import fmin, newton, minimize

In [6]:
def create_features_matrices(n_samples,n_features,choice_W='X_dependent',threshold_neighbors=0.3,nneighbors=5,cov_mat=None):
    
    if cov_mat is None:
        cov_mat = np.array([[1., 0.2], [0.2, 1.]])
    X = np.random.multivariate_normal([0.]*n_features,cov_mat,size=n_samples)
    #X = np.random.uniform(size=(n_samples,n_features))
    X = np.array([np.concatenate(([1],x)) for x in X])
    Z = np.random.uniform(size=(n_samples,n_features))
    
    if choice_W == 'X_dependent':
        distance_matrix = scipy.spatial.distance_matrix(X,X)
        W = np.zeros(np.shape(distance_matrix))
        W[distance_matrix < threshold_neighbors] = 1
    elif choice_W == 'random_distance':
        random_spatial_distance = np.random.rand(n_samples,n_features)
        neighbors = NearestNeighbors(n_neighbors=nneighbors).fit(random_spatial_distance)
        W = neighbors.kneighbors_graph(random_spatial_distance, mode='distance').toarray()
        W[W>0] = 1/W[W>0]
    else:
        random_spatial_distance = np.random.rand(n_samples,n_features)
        neighbors = NearestNeighbors(n_neighbors=nneighbors).fit(random_spatial_distance)
        W = neighbors.kneighbors_graph(random_spatial_distance).toarray()
    # replace the 1 on the diagonal by 0
    np.fill_diagonal(W,0)
    # scaling the matrix, so that the sum of each row is 1
    W = W/W.sum(axis=1)[:,None]
    return(X,Z,W)

In [7]:
def cos_similarity(x1,x2):
    # cosine similarity for two matrices, computed as the mean of the cosine similarities of each row
    return(np.mean([np.dot(x1[i],x2[i])/(np.linalg.norm(x1[i])*np.linalg.norm(x2[i])) for i in range(len(x1))]))

# With two features

In [8]:
n_features = 2
n_classes = 3

In [10]:
np.random.seed(21)

beta = np.array([[0.  , 0. , .1],
                 [0.  , 1., -2.],
                 [0.  , -1., -2. ]])

gamma_var = np.array([2.,3.])

In [11]:
n_repeat = 100
list_n_samples = [50,200,1000]

In [12]:
cov_matrix = np.array([[1., 0.2], [0.2, 1.]])

# Estimation of the parameters

In [80]:
n_samples=200
rho=0.9

X,Z,W = create_features_matrices(n_samples,n_features,choice_W='random_distance',nneighbors=10,cov_mat=cov_matrix)
Z[:,0] = 1
M = np.identity(n_samples) - rho*W

mu = dirichlet_regression.compute_mu_spatial(X, beta, M)
#phi = np.exp(np.matmul(Z,gamma_var))
phi = 15*np.ones(n_samples)
alpha = mu*phi[:,None]

Y = np.array([np.random.dirichlet(alpha_i) for alpha_i in alpha])
Y = (Y*(n_samples-1)+1/n_classes)/n_samples

In [77]:
np.sum(Y,axis=0)

array([38.38067552, 76.35362904, 85.26569544])

In [78]:
%%time
reg_spatial = dirichlet_regression.dirichletRegressor(spatial=True, maxfun=5000)
reg_spatial.fit(X, Y, parametrization='alternative', Z=Z, W=W, fit_intercept=False, verbose=1)


b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
Wall time: 7.22 s


In [79]:
%%time
reg_spatial_ce = dirichlet_regression.dirichletRegressor(spatial=True)
reg_spatial_ce.fit(X, Y, parametrization='alternative', Z=Z, W=W, fit_intercept=False, verbose=1, loss='crossentropy')


Optimization terminated successfully.
Wall time: 606 ms


Test on another set of data (rerun the generation of X and W beforehand)

In [81]:
mu_pred = reg_spatial.pred(X,W)
print('R2:',r2_score(Y,mu_pred))
print('Cos similarity:',cos_similarity(Y, mu_pred))
print('RMSE:',mean_squared_error(Y,mu_pred,squared=False))

R2: 0.7406959828864667
Cos similarity: 0.9779101601507182
RMSE: 0.1225223647569414


In [82]:
mu_pred = reg_spatial_ce.pred(X,W)
print('R2:',r2_score(Y,mu_pred))
print('Cos similarity:',cos_similarity(Y, mu_pred))
print('RMSE:',mean_squared_error(Y,mu_pred,squared=False))

R2: 0.9594509374281396
Cos similarity: 0.9968907893785736
RMSE: 0.04279544323423783


---

In [232]:
def estimating_parameters(rho, n_repeat=100, list_n_samples=[50,200,1000], cov_matrix=None):
    list_solutions_spatial, list_solutions_no_spatial = [], []
    list_solutions_ce_spatial, list_solutions_ce_no_spatial = [], []

    seed=0

    for i in range(len(list_n_samples)):
        n_samples = list_n_samples[i]

        true_params = np.concatenate([beta.flatten(),gamma_var, [rho]])

        solutions_spatial_temp, solutions_no_spatial_temp = [], []
        solutions_ce_spatial_temp, solutions_ce_no_spatial_temp = [], []
        for _ in range(n_repeat):
            np.random.seed(seed)

            X,Z,W = create_features_matrices(n_samples,n_features,choice_W='random_distance',nneighbors=10,cov_mat=cov_matrix)
            Z[:,0] = 1
            M = np.identity(n_samples) - rho*W
            
            try:
                mu = dirichlet_regression.compute_mu_spatial(X, beta, M)
                phi = np.exp(np.matmul(Z,gamma_var))
                alpha = mu*phi[:,None]

                Y = np.array([np.random.dirichlet(alpha_i) for alpha_i in alpha])
                Y = (Y*(n_samples-1)+1/n_classes)/n_samples

                reg_spatial = dirichlet_regression.dirichletRegressor(spatial=True, maxfun=5000)
                reg_spatial.fit(X, Y, parametrization='alternative', Z=Z, W=W, fit_intercept=False, verbose=0)
                solutions_spatial_temp.append(np.concatenate([reg_spatial.beta.flatten(),reg_spatial.gamma,[reg_spatial.rho]]))

                reg_no_spatial = dirichlet_regression.dirichletRegressor(spatial=False, maxfun=5000)
                reg_no_spatial.fit(X, Y, parametrization='alternative', Z=Z, fit_intercept=False, verbose=0)
                solutions_no_spatial_temp.append(np.concatenate([reg_no_spatial.beta.flatten(),reg_no_spatial.gamma]))

                reg_spatial_ce = dirichlet_regression.dirichletRegressor(spatial=True, maxfun=5000)
                reg_spatial_ce.fit(X, Y, loss='crossentropy', W=W, fit_intercept=False, verbose=0)
                solutions_ce_spatial_temp.append(np.concatenate([reg_spatial_ce.beta.flatten(),[reg_spatial_ce.rho]]))

                reg_no_spatial_ce = dirichlet_regression.dirichletRegressor(spatial=False, maxfun=5000)
                reg_no_spatial_ce.fit(X, Y, loss='crossentropy', fit_intercept=False, verbose=0)
                solutions_ce_no_spatial_temp.append(reg_no_spatial_ce.beta.flatten())

            except RuntimeError:
                print("Factor is exactly singular")
            except np.linalg.LinAlgError:
                print("Singular matrix")

            seed+=1
        list_solutions_spatial.append(solutions_spatial_temp)
        list_solutions_no_spatial.append(solutions_no_spatial_temp)
        list_solutions_ce_spatial.append(solutions_ce_spatial_temp)
        list_solutions_ce_no_spatial.append(solutions_ce_no_spatial_temp)
    return(list_solutions_spatial, list_solutions_no_spatial, list_solutions_ce_spatial, list_solutions_ce_no_spatial)

## rho=0.1

In [233]:
%%time
list_solutions_spatial, list_solutions_no_spatial, list_solutions_ce_spatial, list_solutions_ce_no_spatial = estimating_parameters(0.1, n_repeat=100, list_n_samples=[50,200,1000], cov_matrix=cov_matrix)

Wall time: 17min 22s


In [234]:
true_params_effective = np.concatenate([beta[:,1:].flatten(),gamma_var, [rho]])
true_params_effective_no_spatial = np.concatenate([beta[:,1:].flatten(),gamma_var])

In [235]:
np.save('Data Dirichlet/dirichlet_solutions_spatial_rho01.npy',list_solutions_spatial)
np.save('Data Dirichlet/dirichlet_solutions_no_spatial_rho01.npy',list_solutions_no_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_spatial_rho01.npy',list_solutions_ce_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho01.npy',list_solutions_ce_no_spatial)

## rho=0.5

In [242]:
%%time
list_solutions_spatial, list_solutions_no_spatial, list_solutions_ce_spatial, list_solutions_ce_no_spatial = estimating_parameters(0.5, n_repeat=100, list_n_samples=[50,200,1000], cov_matrix=cov_matrix)

Singular matrix
Singular matrix
Singular matrix
Wall time: 29min 54s


In [243]:
np.save('Data Dirichlet/dirichlet_solutions_spatial_rho05.npy',list_solutions_spatial)
np.save('Data Dirichlet/dirichlet_solutions_no_spatial_rho05.npy',list_solutions_no_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_spatial_rho05.npy',list_solutions_ce_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho05.npy',list_solutions_ce_no_spatial)

  return array(a, dtype, copy=False, order=order, subok=True)


## rho = 0.9

In [251]:
%%time
list_solutions_spatial, list_solutions_no_spatial, list_solutions_ce_spatial, list_solutions_ce_no_spatial = estimating_parameters(0.9, n_repeat=100, list_n_samples=[50,200,1000], cov_matrix=cov_matrix)

Singular matrix
Singular matrix
Singular matrix


  df = (f(*((xk + d,) + args)) - f0) / d[k]
  df = (f(*((xk + d,) + args)) - f0) / d[k]


Wall time: 1h 13min 6s


In [252]:
np.save('Data Dirichlet/dirichlet_solutions_spatial_rho09.npy',list_solutions_spatial)
np.save('Data Dirichlet/dirichlet_solutions_no_spatial_rho09.npy',list_solutions_no_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_spatial_rho09.npy',list_solutions_ce_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho09.npy',list_solutions_ce_no_spatial)

  return array(a, dtype, copy=False, order=order, subok=True)


## rho=0

In [276]:
%%time
list_solutions_spatial, list_solutions_no_spatial, list_solutions_ce_spatial, list_solutions_ce_no_spatial = estimating_parameters(0., n_repeat=100, list_n_samples=[50,200,1000], cov_matrix=cov_matrix)

Wall time: 22min 27s


In [277]:
np.save('Data Dirichlet/dirichlet_solutions_spatial_rho00.npy',list_solutions_spatial)
np.save('Data Dirichlet/dirichlet_solutions_no_spatial_rho00.npy',list_solutions_no_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_spatial_rho00.npy',list_solutions_ce_spatial)
np.save('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho00.npy',list_solutions_ce_no_spatial)

# Analysis of the results

In [13]:
from texttable import Texttable
import latextable

In [14]:
list_solutions_spatial_01 = np.load('Data Dirichlet/dirichlet_solutions_spatial_rho01.npy')
list_solutions_no_spatial_01 = np.load('Data Dirichlet/dirichlet_solutions_no_spatial_rho01.npy')
list_solutions_ce_spatial_01 = np.load('Data Dirichlet/dirichlet_solutions_ce_spatial_rho01.npy')
list_solutions_ce_no_spatial_01 = np.load('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho01.npy')

In [15]:
list_solutions_spatial_05 = np.load('Data Dirichlet/dirichlet_solutions_spatial_rho05.npy', allow_pickle=True)
list_solutions_no_spatial_05 = np.load('Data Dirichlet/dirichlet_solutions_no_spatial_rho05.npy', allow_pickle=True)
list_solutions_ce_spatial_05 = np.load('Data Dirichlet/dirichlet_solutions_ce_spatial_rho05.npy', allow_pickle=True)
list_solutions_ce_no_spatial_05 = np.load('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho05.npy', allow_pickle=True)

In [16]:
list_solutions_spatial_09 = np.load('Data Dirichlet/dirichlet_solutions_spatial_rho09.npy', allow_pickle=True)
list_solutions_no_spatial_09 = np.load('Data Dirichlet/dirichlet_solutions_no_spatial_rho09.npy', allow_pickle=True)
list_solutions_ce_spatial_09 = np.load('Data Dirichlet/dirichlet_solutions_ce_spatial_rho09.npy', allow_pickle=True)
list_solutions_ce_no_spatial_09 = np.load('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho09.npy', allow_pickle=True)

In [17]:
true_params_effective_01 = np.concatenate([beta[:,1:].flatten(),gamma_var, [0.1]])
true_params_effective_05 = np.concatenate([beta[:,1:].flatten(),gamma_var, [0.5]])
true_params_effective_09 = np.concatenate([beta[:,1:].flatten(),gamma_var, [0.9]])
true_params_effective_no_spatial = np.concatenate([beta[:,1:].flatten(),gamma_var])

In [18]:
list_solutions_spatial_00 = np.load('Data Dirichlet/dirichlet_solutions_spatial_rho00.npy')
list_solutions_no_spatial_00 = np.load('Data Dirichlet/dirichlet_solutions_no_spatial_rho00.npy')
list_solutions_ce_spatial_00 = np.load('Data Dirichlet/dirichlet_solutions_ce_spatial_rho00.npy', allow_pickle=True)
list_solutions_ce_no_spatial_00 = np.load('Data Dirichlet/dirichlet_solutions_ce_no_spatial_rho00.npy', allow_pickle=True)
true_params_effective_00 = np.concatenate([beta[:,1:].flatten(),gamma_var, [0.]])

### Results of maximum likelihood

In [281]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])
                      
#param_names = ["$\\beta_{01}$", "", "$\\beta_{02}$", "", "$\\beta_{11}$", "", "$\\beta_{12}$", "",
#               "$\\beta_{21}$", "", "$\\beta_{22}$", "", "$\\gamma_0$", "", "$\\gamma_1$", "", "$\\rho$", ""]

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\gamma_0$}", "\\multirow{2}{*}{$\\gamma_1$}", "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(list_solutions_spatial_01[ns][:,i] - true_params_effective_01[i]),3))
                  +' ('+str(np.round(np.std(list_solutions_spatial_01[ns][:,i] - true_params_effective_01[i]),3))+')' for i in range(len(true_params_effective_01))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(list_solutions_spatial_01[ns][:,i], [true_params_effective_01[i]]*100),3)) +']' for i in range(len(true_params_effective_01))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(list_solutions_no_spatial_01[ns][:,i] - true_params_effective_no_spatial[i]),3))
                  +' ('+str(np.round(np.std(list_solutions_no_spatial_01[ns][:,i] - true_params_effective_no_spatial[i]),3))+')' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(list_solutions_no_spatial_01[ns][:,i], [true_params_effective_no_spatial[i]]*100),3)) +']' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.013     | 0.012    | 0.01     | 0.014    | 0.012    | 0.011    |
| {2}{*}{$\ | (0.059)   | (0.032)  | (0.016)  | (0.067)  | (0.036)  | (0.018)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.004]   | [0.001]  | [0.0]    |  [0.005] |  [0.001] |  [0.0]   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.038     | 0.035    | 0.031    | 0.051    | 0.055    | 0.047    |
| {2}{*}{$\ | (0.087)   | (0

In [262]:
print(latextable.draw_latex(table_params, caption="Difference between the estimated parameters and the true parameters ($rho=0.1$). The results are presented as the mean of the differences on the 100 iterations, the standard deviation within parenthesis, and the standard deviation within square brackets."))

\begin{table}
	\begin{center}
		\begin{tabular}{|l|l|l|l|l|l|l|}
			\hline
			 \\
			\hline
			Parameter & S n=50 & S n=200 & S n=1000 & NS n=50 & NS n=200 & NS n=1000 \\
			\hline
			\multirow{2}{*}{$\beta_{01}$} & 0.013 (0.059) & 0.012 (0.032) & 0.01 (0.016) & 0.014 (0.067) & 0.012 (0.036) & 0.011 (0.018) \\
			\hline
			 & [0.004] & [0.001] & [0.0] &  [0.005] &  [0.001] &  [0.0] \\
			\hline
			\multirow{2}{*}{$\beta_{02}$} & 0.038 (0.087) & 0.035 (0.034) & 0.031 (0.016) & 0.051 (0.106) & 0.055 (0.046) & 0.047 (0.021) \\
			\hline
			 & [0.009] & [0.002] & [0.001] &  [0.014] &  [0.005] &  [0.003] \\
			\hline
			\multirow{2}{*}{$\beta_{11}$} & -0.062 (0.079) & -0.037 (0.038) & -0.019 (0.017) & -0.067 (0.081) & -0.04 (0.038) & -0.021 (0.017) \\
			\hline
			 & [0.01] & [0.003] & [0.001] &  [0.011] &  [0.003] &  [0.001] \\
			\hline
			\multirow{2}{*}{$\beta_{12}$} & 0.277 (0.12) & 0.185 (0.051) & 0.141 (0.023) & 0.288 (0.127) & 0.198 (0.055) & 0.158 (0.024) \\
			\hline
			 & [0.091]

In [272]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])
                      
#param_names = ["$\\beta_{01}$", "", "$\\beta_{02}$", "", "$\\beta_{11}$", "", "$\\beta_{12}$", "",
#               "$\\beta_{21}$", "", "$\\beta_{22}$", "", "$\\gamma_0$", "", "$\\gamma_1$", "", "$\\rho$", ""]

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\gamma_0$}", "\\multirow{2}{*}{$\\gamma_1$}", "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_spatial_05[ns])[:,i] - true_params_effective_05[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_spatial_05[ns])[:,i] - true_params_effective_05[i]),3))+')' for i in range(len(true_params_effective_05))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(np.array(list_solutions_spatial_05[ns])[:,i], [true_params_effective_05[i]]*len(list_solutions_spatial_05[ns])),3)) +']' for i in range(len(true_params_effective_05))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_no_spatial_05[ns])[:,i] - true_params_effective_no_spatial[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_no_spatial_05[ns])[:,i] - true_params_effective_no_spatial[i]),3))+')' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(np.array(list_solutions_no_spatial_05[ns])[:,i], [true_params_effective_no_spatial[i]]*len(list_solutions_no_spatial_05[ns])),3)) +']' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.009     | 0.011    | 0.007    | 0.029    | 0.03     | 0.028    |
| {2}{*}{$\ | (0.042)   | (0.02)   | (0.009)  | (0.157)  | (0.079)  | (0.037)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.002]   | [0.001]  | [0.0]    |  [0.026] |  [0.007] |  [0.002] |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.012     | 0.023    | 0.017    | 0.116    | 0.184    | 0.144    |
| {2}{*}{$\ | (0.047)   | (0

In [273]:
print(latextable.draw_latex(table_params, caption="Difference between the estimated parameters and the true parameters ($rho=0.5$). The results are presented as the mean of the differences on the 100 iterations, the standard deviation within parenthesis, and the standard deviation within square brackets."))

\begin{table}
	\begin{center}
		\begin{tabular}{|l|l|l|l|l|l|l|}
			\hline
			 \\
			\hline
			Parameter & S n=50 & S n=200 & S n=1000 & NS n=50 & NS n=200 & NS n=1000 \\
			\hline
			\multirow{2}{*}{$\beta_{01}$} & 0.009 (0.042) & 0.011 (0.02) & 0.007 (0.009) & 0.029 (0.157) & 0.03 (0.079) & 0.028 (0.037) \\
			\hline
			 & [0.002] & [0.001] & [0.0] &  [0.026] &  [0.007] &  [0.002] \\
			\hline
			\multirow{2}{*}{$\beta_{02}$} & 0.012 (0.047) & 0.023 (0.017) & 0.017 (0.009) & 0.116 (0.312) & 0.184 (0.147) & 0.144 (0.066) \\
			\hline
			 & [0.002] & [0.001] & [0.0] &  [0.111] &  [0.055] &  [0.025] \\
			\hline
			\multirow{2}{*}{$\beta_{11}$} & -0.06 (0.083) & -0.037 (0.037) & -0.024 (0.016) & -0.163 (0.122) & -0.17 (0.057) & -0.155 (0.027) \\
			\hline
			 & [0.011] & [0.003] & [0.001] &  [0.042] &  [0.032] &  [0.025] \\
			\hline
			\multirow{2}{*}{$\beta_{12}$} & 0.356 (0.123) & 0.221 (0.061) & 0.176 (0.029) & 0.682 (0.205) & 0.622 (0.099) & 0.598 (0.049) \\
			\hline
			 & [0.142]

In [274]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])
                      
#param_names = ["$\\beta_{01}$", "", "$\\beta_{02}$", "", "$\\beta_{11}$", "", "$\\beta_{12}$", "",
#               "$\\beta_{21}$", "", "$\\beta_{22}$", "", "$\\gamma_0$", "", "$\\gamma_1$", "", "$\\rho$", ""]

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\gamma_0$}", "\\multirow{2}{*}{$\\gamma_1$}", "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_spatial_09[ns])[:,i] - true_params_effective_09[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_spatial_09[ns])[:,i] - true_params_effective_09[i]),3))+')' for i in range(len(true_params_effective_09))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(np.array(list_solutions_spatial_09[ns])[:,i], [true_params_effective_09[i]]*len(list_solutions_spatial_09[ns])),3)) +']' for i in range(len(true_params_effective_09))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_no_spatial_09[ns])[:,i] - true_params_effective_no_spatial[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_no_spatial_09[ns])[:,i] - true_params_effective_no_spatial[i]),3))+')' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(np.array(list_solutions_no_spatial_09[ns])[:,i], [true_params_effective_no_spatial[i]]*len(list_solutions_no_spatial_09[ns])),3)) +']' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.007     | 0.011    | 0.01     | 0.096    | 0.019    | 0.044    |
| {2}{*}{$\ | (0.076)   | (0.021)  | (0.011)  | (0.596)  | (0.24)   | (0.097)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.006]   | [0.001]  | [0.0]    |  [0.364] |  [0.058] |  [0.011] |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | -0.022    | -0.019   | -0.024   | 0.303    | 0.298    | 0.206    |
| {2}{*}{$\ | (0.126)   | (0

In [275]:
print(latextable.draw_latex(table_params, caption="Difference between the estimated parameters and the true parameters ($rho=0.9$). The results are presented as the mean of the differences on the 100 iterations, the standard deviation within parenthesis, and the standard deviation within square brackets."))

\begin{table}
	\begin{center}
		\begin{tabular}{|l|l|l|l|l|l|l|}
			\hline
			 \\
			\hline
			Parameter & S n=50 & S n=200 & S n=1000 & NS n=50 & NS n=200 & NS n=1000 \\
			\hline
			\multirow{2}{*}{$\beta_{01}$} & 0.007 (0.076) & 0.011 (0.021) & 0.01 (0.011) & 0.096 (0.596) & 0.019 (0.24) & 0.044 (0.097) \\
			\hline
			 & [0.006] & [0.001] & [0.0] &  [0.364] &  [0.058] &  [0.011] \\
			\hline
			\multirow{2}{*}{$\beta_{02}$} & -0.022 (0.126) & -0.019 (0.03) & -0.024 (0.017) & 0.303 (1.096) & 0.298 (0.381) & 0.206 (0.165) \\
			\hline
			 & [0.016] & [0.001] & [0.001] &  [1.292] &  [0.234] &  [0.069] \\
			\hline
			\multirow{2}{*}{$\beta_{11}$} & -0.418 (0.335) & -0.423 (0.194) & -0.39 (0.112) & -0.653 (0.254) & -0.696 (0.113) & -0.673 (0.05) \\
			\hline
			 & [0.287] & [0.217] & [0.165] &  [0.491] &  [0.496] &  [0.455] \\
			\hline
			\multirow{2}{*}{$\beta_{12}$} & 1.193 (0.481) & 1.185 (0.345) & 1.148 (0.218) & 1.588 (0.27) & 1.559 (0.088) & 1.531 (0.049) \\
			\hline
			 & [1.6

In [22]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\gamma_0$}", "\\multirow{2}{*}{$\\gamma_1$}", "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(list_solutions_spatial_00[ns][:,i] - true_params_effective_00[i]),3))
                  +' ('+str(np.round(np.std(list_solutions_spatial_00[ns][:,i] - true_params_effective_00[i]),3))+')' for i in range(len(true_params_effective_00))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(list_solutions_spatial_00[ns][:,i], [true_params_effective_00[i]]*100),3)) +']' for i in range(len(true_params_effective_00))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(list_solutions_no_spatial_00[ns][:,i] - true_params_effective_no_spatial[i]),3))
                  +' ('+str(np.round(np.std(list_solutions_no_spatial_00[ns][:,i] - true_params_effective_no_spatial[i]),3))+')' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(list_solutions_no_spatial_00[ns][:,i], [true_params_effective_no_spatial[i]]*100),3)) +']' for i in range(len(true_params_effective_no_spatial))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.649     | 0.553    | 0.461    | 0.617    | 0.543    | 0.46     |
| {2}{*}{$\ | (0.418)   | (0.203)  | (0.087)  | (0.393)  | (0.201)  | (0.085)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.596]   | [0.347]  | [0.221]  |  [0.535] |  [0.335] |  [0.219] |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | -0.384    | -0.359   | -0.313   | -0.342   | -0.349   | -0.311   |
| {2}{*}{$\ | (0.433)   | (0

### Results on cross-entropy

In [19]:
true_params_effective_01_ce = np.concatenate([beta[:,1:].flatten(), [0.1]])
true_params_effective_05_ce = np.concatenate([beta[:,1:].flatten(), [0.5]])
true_params_effective_09_ce = np.concatenate([beta[:,1:].flatten(), [0.9]])
true_params_effective_no_spatial_ce = np.concatenate([beta[:,1:].flatten()])

In [20]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])
                      
#param_names = ["$\\beta_{01}$", "", "$\\beta_{02}$", "", "$\\beta_{11}$", "", "$\\beta_{12}$", "",
#               "$\\beta_{21}$", "", "$\\beta_{22}$", "", "$\\gamma_0$", "", "$\\gamma_1$", "", "$\\rho$", ""]

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(list_solutions_ce_spatial_01[ns][:,i] - true_params_effective_01_ce[i]),3))
                  +' ('+str(np.round(np.std(list_solutions_ce_spatial_01[ns][:,i] - true_params_effective_01_ce[i]),3))+')' for i in range(len(true_params_effective_01_ce))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(list_solutions_ce_spatial_01[ns][:,i], [true_params_effective_01_ce[i]]*100),3)) +']' for i in range(len(true_params_effective_01_ce))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(list_solutions_ce_no_spatial_01[ns][:,i] - true_params_effective_no_spatial_ce[i]),3))
                  +' ('+str(np.round(np.std(list_solutions_ce_no_spatial_01[ns][:,i] - true_params_effective_no_spatial_ce[i]),3))+')' for i in range(len(true_params_effective_no_spatial_ce))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(list_solutions_ce_no_spatial_01[ns][:,i], [true_params_effective_no_spatial_ce[i]]*100),3)) +']' for i in range(len(true_params_effective_no_spatial_ce))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.019     | 0.002    | 0.002    | 0.02     | 0.0      | 0.002    |
| {2}{*}{$\ | (0.088)   | (0.051)  | (0.022)  | (0.102)  | (0.058)  | (0.025)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.008]   | [0.003]  | [0.0]    |  [0.011] |  [0.003] |  [0.001] |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.021     | 0.004    | 0.003    | 0.037    | 0.022    | 0.016    |
| {2}{*}{$\ | (0.108)   | (0

In [21]:
print(latextable.draw_latex(table_params, caption="Difference between the estimated parameters with the cross-entropy minimization and the true parameters ($rho=0.1$). The results are presented as the mean of the differences on the 100 iterations, the standard deviation within parenthesis, and the standard deviation within square brackets."))

\begin{table}
	\begin{center}
		\begin{tabular}{|l|l|l|l|l|l|l|}
			\hline
			 \\
			\hline
			Parameter & S n=50 & S n=200 & S n=1000 & NS n=50 & NS n=200 & NS n=1000 \\
			\hline
			\multirow{2}{*}{$\beta_{01}$} & 0.019 (0.088) & 0.002 (0.051) & 0.002 (0.022) & 0.02 (0.102) & 0.0 (0.058) & 0.002 (0.025) \\
			\hline
			 & [0.008] & [0.003] & [0.0] &  [0.011] &  [0.003] &  [0.001] \\
			\hline
			\multirow{2}{*}{$\beta_{02}$} & 0.021 (0.108) & 0.004 (0.048) & 0.003 (0.022) & 0.037 (0.13) & 0.022 (0.056) & 0.016 (0.027) \\
			\hline
			 & [0.012] & [0.002] & [0.0] &  [0.018] &  [0.004] &  [0.001] \\
			\hline
			\multirow{2}{*}{$\beta_{11}$} & -0.052 (0.117) & -0.02 (0.051) & -0.002 (0.023) & -0.052 (0.117) & -0.018 (0.051) & -0.001 (0.024) \\
			\hline
			 & [0.016] & [0.003] & [0.001] &  [0.016] &  [0.003] &  [0.001] \\
			\hline
			\multirow{2}{*}{$\beta_{12}$} & 0.141 (0.127) & 0.043 (0.076) & 0.008 (0.035) & 0.145 (0.129) & 0.046 (0.077) & 0.013 (0.036) \\
			\hline
			 & [0.036] 

In [22]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])
                      
#param_names = ["$\\beta_{01}$", "", "$\\beta_{02}$", "", "$\\beta_{11}$", "", "$\\beta_{12}$", "",
#               "$\\beta_{21}$", "", "$\\beta_{22}$", "", "$\\gamma_0$", "", "$\\gamma_1$", "", "$\\rho$", ""]

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_ce_spatial_05[ns])[:,i] - true_params_effective_05_ce[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_ce_spatial_05[ns])[:,i] - true_params_effective_05_ce[i]),3))+')' for i in range(len(true_params_effective_05_ce))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(np.array(list_solutions_ce_spatial_05[ns])[:,i], [true_params_effective_05_ce[i]]*len(list_solutions_ce_spatial_05[ns])),3)) +']' for i in range(len(true_params_effective_05_ce))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_ce_no_spatial_05[ns])[:,i] - true_params_effective_no_spatial_ce[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_ce_no_spatial_05[ns])[:,i] - true_params_effective_no_spatial_ce[i]),3))+')' for i in range(len(true_params_effective_no_spatial_ce))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(np.array(list_solutions_ce_no_spatial_05[ns])[:,i], [true_params_effective_no_spatial_ce[i]]*len(list_solutions_ce_no_spatial_05[ns])),3)) +']' for i in range(len(true_params_effective_no_spatial_ce))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | -0.001    | 0.006    | 0.001    | 0.013    | 0.015    | 0.022    |
| {2}{*}{$\ | (0.058)   | (0.031)  | (0.013)  | (0.216)  | (0.115)  | (0.051)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.003]   | [0.001]  | [0.0]    |  [0.047] |  [0.013] |  [0.003] |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | -0.005    | 0.007    | 0.001    | 0.123    | 0.206    | 0.169    |
| {2}{*}{$\ | (0.062)   | (0

In [23]:
print(latextable.draw_latex(table_params, caption="Difference between the estimated parameters with the cross-entropy minimization and the true parameters ($rho=0.5$). The results are presented as the mean of the differences on the 100 iterations, the standard deviation within parenthesis, and the standard deviation within square brackets."))

\begin{table}
	\begin{center}
		\begin{tabular}{|l|l|l|l|l|l|l|}
			\hline
			 \\
			\hline
			Parameter & S n=50 & S n=200 & S n=1000 & NS n=50 & NS n=200 & NS n=1000 \\
			\hline
			\multirow{2}{*}{$\beta_{01}$} & -0.001 (0.058) & 0.006 (0.031) & 0.001 (0.013) & 0.013 (0.216) & 0.015 (0.115) & 0.022 (0.051) \\
			\hline
			 & [0.003] & [0.001] & [0.0] &  [0.047] &  [0.013] &  [0.003] \\
			\hline
			\multirow{2}{*}{$\beta_{02}$} & -0.005 (0.062) & 0.007 (0.027) & 0.001 (0.013) & 0.123 (0.377) & 0.206 (0.195) & 0.169 (0.086) \\
			\hline
			 & [0.004] & [0.001] & [0.0] &  [0.157] &  [0.08] &  [0.036] \\
			\hline
			\multirow{2}{*}{$\beta_{11}$} & -0.036 (0.104) & -0.011 (0.052) & -0.002 (0.026) & -0.026 (0.139) & 0.001 (0.069) & 0.007 (0.033) \\
			\hline
			 & [0.012] & [0.003] & [0.001] &  [0.02] &  [0.005] &  [0.001] \\
			\hline
			\multirow{2}{*}{$\beta_{12}$} & 0.194 (0.121) & 0.055 (0.082) & 0.015 (0.036) & 0.314 (0.211) & 0.211 (0.112) & 0.204 (0.054) \\
			\hline
			 & [0.05

In [24]:
table_params = Texttable()
table_params.add_row(["Parameter", "S n=50", "S n=200", "S n=1000",
                      "NS n=50", "NS n=200", "NS n=1000"])
                      
#param_names = ["$\\beta_{01}$", "", "$\\beta_{02}$", "", "$\\beta_{11}$", "", "$\\beta_{12}$", "",
#               "$\\beta_{21}$", "", "$\\beta_{22}$", "", "$\\gamma_0$", "", "$\\gamma_1$", "", "$\\rho$", ""]

param_names = ["\\multirow{2}{*}{$\\beta_{01}$}", "\\multirow{2}{*}{$\\beta_{02}$}", "\\multirow{2}{*}{$\\beta_{11}$}",
               "\\multirow{2}{*}{$\\beta_{12}$}", "\\multirow{2}{*}{$\\beta_{21}$}", "\\multirow{2}{*}{$\\beta_{22}$}",
               "\\multirow{2}{*}{$\\rho$}" ]
param_names_2 = [""]*len(param_names)

columns_1 = []
columns_2 = []

for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_ce_spatial_09[ns])[:,i] - true_params_effective_09_ce[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_ce_spatial_09[ns])[:,i] - true_params_effective_09_ce[i]),3))+')' for i in range(len(true_params_effective_09_ce))]
    columns_1.append(col_to_add)
    col_to_add = ['['+  str(np.round(mean_squared_error(np.array(list_solutions_ce_spatial_09[ns])[:,i], [true_params_effective_09_ce[i]]*len(list_solutions_ce_spatial_09[ns])),3)) +']' for i in range(len(true_params_effective_09_ce))]
    columns_2.append(col_to_add)
    
for ns in range(3):
    col_to_add = [str(np.round(np.mean(np.array(list_solutions_ce_no_spatial_09[ns])[:,i] - true_params_effective_no_spatial_ce[i]),3))
                  +' ('+str(np.round(np.std(np.array(list_solutions_ce_no_spatial_09[ns])[:,i] - true_params_effective_no_spatial_ce[i]),3))+')' for i in range(len(true_params_effective_no_spatial_ce))] + ["/"]
    columns_1.append(col_to_add)
    col_to_add = [' ['+  str(np.round(mean_squared_error(np.array(list_solutions_ce_no_spatial_09[ns])[:,i], [true_params_effective_no_spatial_ce[i]]*len(list_solutions_ce_no_spatial_09[ns])),3)) +']' for i in range(len(true_params_effective_no_spatial_ce))] + ["/"]
    columns_2.append(col_to_add)
        
columns_1.insert(0,param_names)
columns_2.insert(0,param_names_2)

tr_columns_1 = np.transpose(columns_1)
tr_columns_2 = np.transpose(columns_2)
for i in range(len(tr_columns_1)):
    table_params.add_row(tr_columns_1[i])
    table_params.add_row(tr_columns_2[i])

print(table_params.draw())

+-----------+-----------+----------+----------+----------+----------+----------+
| Parameter | S n=50    | S n=200  | S n=1000 | NS n=50  | NS n=200 | NS       |
|           |           |          |          |          |          | n=1000   |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | 0.002     | 0.0      | 0.0      | 0.143    | 0.062    | 0.179    |
| {2}{*}{$\ | (0.063)   | (0.009)  | (0.004)  | (1.442)  | (0.73)   | (0.292)  |
| beta_{01} |           |          |          |          |          |          |
| $}        |           |          |          |          |          |          |
+-----------+-----------+----------+----------+----------+----------+----------+
|           | [0.004]   | [0.0]    | [0.0]    |  [2.099] |  [0.537] |  [0.117] |
+-----------+-----------+----------+----------+----------+----------+----------+
| \multirow | -0.004    | -0.003   | -0.0     | 0.696    | 0.845    | 0.726    |
| {2}{*}{$\ | (0.088)   | (0

In [25]:
print(latextable.draw_latex(table_params, caption="Difference between the estimated parameters with the cross-entropy minimization and the true parameters ($rho=0.9$). The results are presented as the mean of the differences on the 100 iterations, the standard deviation within parenthesis, and the standard deviation within square brackets."))

\begin{table}
	\begin{center}
		\begin{tabular}{|l|l|l|l|l|l|l|}
			\hline
			 \\
			\hline
			Parameter & S n=50 & S n=200 & S n=1000 & NS n=50 & NS n=200 & NS n=1000 \\
			\hline
			\multirow{2}{*}{$\beta_{01}$} & 0.002 (0.063) & 0.0 (0.009) & 0.0 (0.004) & 0.143 (1.442) & 0.062 (0.73) & 0.179 (0.292) \\
			\hline
			 & [0.004] & [0.0] & [0.0] &  [2.099] &  [0.537] &  [0.117] \\
			\hline
			\multirow{2}{*}{$\beta_{02}$} & -0.004 (0.088) & -0.003 (0.01) & -0.0 (0.004) & 0.696 (1.836) & 0.845 (0.743) & 0.726 (0.34) \\
			\hline
			 & [0.008] & [0.0] & [0.0] &  [3.853] &  [1.267] &  [0.643] \\
			\hline
			\multirow{2}{*}{$\beta_{11}$} & -0.188 (0.316) & -0.029 (0.054) & -0.003 (0.023) & -0.22 (0.37) & -0.179 (0.155) & -0.218 (0.067) \\
			\hline
			 & [0.135] & [0.004] & [0.001] &  [0.185] &  [0.056] &  [0.052] \\
			\hline
			\multirow{2}{*}{$\beta_{12}$} & 0.643 (0.355) & 0.164 (0.112) & 0.033 (0.038) & 1.109 (0.426) & 1.291 (0.199) & 1.32 (0.099) \\
			\hline
			 & [0.54] & [0.039]

# Analysis of the results on a test set

In [237]:
def analysis_results(list_solutions_spatial, list_solutions_no_spatial, rho,
                     n_repeat=100, size_test=1000, list_n_samples=[50,200,1000]):
    
    list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s = [], [], [], []
    list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = [], [], [], []

    seed=0

    for n_samples_index in range(3):
        n_samples = list_n_samples[n_samples_index]
        
        list_r2_s_temp, list_rmse_s_temp, list_similarity_s_temp, list_crossentropy_s_temp = [], [], [], []
        list_r2_ns_temp, list_rmse_ns_temp, list_similarity_ns_temp, list_crossentropy_ns_temp = [], [], [], []
        
        for i in range(len(list_solutions_spatial[n_samples_index])):
            np.random.seed(seed+1000)

            X,Z,W = create_features_matrices(n_samples,n_features,choice_W='random_distance',nneighbors=10)
            Z[:,0] = 1
            M = np.identity(n_samples) - rho*W
            try:
                mu = dirichlet_regression.compute_mu_spatial(X, beta, M)
                phi = np.exp(np.matmul(Z,gamma_var))
                alpha = mu*phi[:,None]

                Y = np.array([np.random.dirichlet(alpha_i) for alpha_i in alpha])
                Y = (Y*(size_test-1)+1/n_classes)/size_test

                solution_spatial = list_solutions_spatial[n_samples_index][i]
                beta_sol_s = np.zeros((n_features+1,n_classes))
                beta_sol_s[:,1:] = solution_spatial[:(n_features+1)*(n_classes-1)].reshape((n_features+1),n_classes-1)
                rho_sol_s = solution_spatial[-1]
                M_sol = np.identity(n_samples) - rho_sol_s*W
                mu_sol_s = dirichlet_regression.compute_mu_spatial(X, beta_sol_s, M_sol)

                list_r2_s_temp.append(r2_score(Y,mu_sol_s))
                list_rmse_s_temp.append(mean_squared_error(Y,mu_sol_s,squared=False))
                list_crossentropy_s_temp.append(-(1/size_test)*np.sum(Y*np.log(mu_sol_s)))
                list_similarity_s_temp.append(cos_similarity(Y, mu_sol_s))

                solution_no_spatial = list_solutions_no_spatial[n_samples_index][i]
                beta_sol_ns = np.zeros((n_features+1,n_classes))
                beta_sol_ns[:,1:] = solution_no_spatial[:(n_features+1)*(n_classes-1)].reshape((n_features+1),n_classes-1)
                mu_sol_ns = dirichlet_regression.compute_mu(X, beta_sol_ns)

                list_r2_ns_temp.append(r2_score(Y,mu_sol_ns))
                list_rmse_ns_temp.append(mean_squared_error(Y,mu_sol_ns,squared=False))
                list_crossentropy_ns_temp.append(-(1/size_test)*np.sum(Y*np.log(mu_sol_ns)))
                list_similarity_ns_temp.append(cos_similarity(Y, mu_sol_ns))

            except RuntimeError:
                print("Factor is exactly singular")
            except np.linalg.LinAlgError:
                print("Singular matrix")

            seed+=1
        list_r2_s.append(list_r2_s_temp)
        list_rmse_s.append(list_rmse_s_temp)
        list_similarity_s.append(list_similarity_s_temp)
        list_crossentropy_s.append(list_crossentropy_s_temp)
        list_r2_ns.append(list_r2_ns_temp)
        list_rmse_ns.append(list_rmse_ns_temp)
        list_similarity_ns.append(list_similarity_ns_temp)
        list_crossentropy_ns.append(list_crossentropy_ns_temp)
    return(list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s,
           list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns)

## rho=0.1

In [301]:
%%time
list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s, list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = analysis_results(list_solutions_spatial_01, list_solutions_no_spatial_01, rho=0.1) 

Wall time: 10.6 s


In [302]:
print("DIRICHLET no spatial")
print('R2', np.round(np.mean(list_r2_ns[2]),4), '(', np.round(np.var(list_r2_ns[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_ns[2]),4), '(', np.round(np.var(list_rmse_ns[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_ns[2]),4), '(', np.round(np.var(list_crossentropy_ns[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_ns[2]),4), '(', np.round(np.var(list_similarity_ns[2]),4), ')')
print("---\nDIRICHLET spatial")
print('R2', np.round(np.mean(list_r2_s[2]),4), '(', np.round(np.var(list_r2_s[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_s[2]),4), '(', np.round(np.var(list_rmse_s[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_s[2]),4), '(', np.round(np.var(list_crossentropy_s[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_s[2]),4), '(', np.round(np.var(list_similarity_s[2]),4), ')')

DIRICHLET no spatial
R2 0.9309 ( 0.0 )
RMSE 0.0739 ( 0.0 )
Crossentropy 0.666 ( 0.0001 )
Cos similarity 0.9855 ( 0.0 )
---
DIRICHLET spatial
R2 0.9335 ( 0.0 )
RMSE 0.0723 ( 0.0 )
Crossentropy 0.6648 ( 0.0001 )
Cos similarity 0.9862 ( 0.0 )


In [303]:
%%time
list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s, list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = analysis_results(list_solutions_ce_spatial_01, list_solutions_ce_no_spatial_01, rho=0.1)

Wall time: 10.6 s


In [304]:
print("CROSSENTROPY no spatial")
print('R2', np.round(np.mean(list_r2_ns[2]),4), '(', np.round(np.var(list_r2_ns[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_ns[2]),4), '(', np.round(np.var(list_rmse_ns[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_ns[2]),4), '(', np.round(np.var(list_crossentropy_ns[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_ns[2]),4), '(', np.round(np.var(list_similarity_ns[2]),4), ')')
print("---\nCROSSENTROPY spatial")
print('R2', np.round(np.mean(list_r2_s[2]),4), '(', np.round(np.var(list_r2_s[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_s[2]),4), '(', np.round(np.var(list_rmse_s[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_s[2]),4), '(', np.round(np.var(list_crossentropy_s[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_s[2]),4), '(', np.round(np.var(list_similarity_s[2]),4), ')')

CROSSENTROPY no spatial
R2 0.9314 ( 0.0 )
RMSE 0.0736 ( 0.0 )
Crossentropy 0.6655 ( 0.0001 )
Cos similarity 0.9856 ( 0.0 )
---
CROSSENTROPY spatial
R2 0.9338 ( 0.0 )
RMSE 0.072 ( 0.0 )
Crossentropy 0.6644 ( 0.0001 )
Cos similarity 0.9862 ( 0.0 )


## rho=0.5

In [305]:
%%time
list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s, list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = analysis_results(list_solutions_spatial_05, list_solutions_no_spatial_05, rho=0.5) 

Wall time: 10.2 s


In [306]:
print("DIRICHLET no spatial")
print('R2', np.round(np.mean(list_r2_ns[2]),4), '(', np.round(np.var(list_r2_ns[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_ns[2]),4), '(', np.round(np.var(list_rmse_ns[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_ns[2]),4), '(', np.round(np.var(list_crossentropy_ns[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_ns[2]),4), '(', np.round(np.var(list_similarity_ns[2]),4), ')')
print("---\nDIRICHLET spatial")
print('R2', np.round(np.mean(list_r2_s[2]),4), '(', np.round(np.var(list_r2_s[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_s[2]),4), '(', np.round(np.var(list_rmse_s[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_s[2]),4), '(', np.round(np.var(list_crossentropy_s[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_s[2]),4), '(', np.round(np.var(list_similarity_s[2]),4), ')')

DIRICHLET no spatial
R2 0.8311 ( 0.0001 )
RMSE 0.1249 ( 0.0 )
Crossentropy 0.6845 ( 0.0001 )
Cos similarity 0.961 ( 0.0 )
---
DIRICHLET spatial
R2 0.9408 ( 0.0 )
RMSE 0.0705 ( 0.0 )
Crossentropy 0.6275 ( 0.0002 )
Cos similarity 0.9872 ( 0.0 )


In [307]:
list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s, list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = analysis_results(list_solutions_ce_spatial_05, list_solutions_ce_no_spatial_05, rho=0.5)

In [308]:
print("CROSSENTROPY no spatial")
print('R2', np.round(np.mean(list_r2_ns[2]),4), '(', np.round(np.var(list_r2_ns[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_ns[2]),4), '(', np.round(np.var(list_rmse_ns[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_ns[2]),4), '(', np.round(np.var(list_crossentropy_ns[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_ns[2]),4), '(', np.round(np.var(list_similarity_ns[2]),4), ')')
print("---\nCROSSENTROPY spatial")
print('R2', np.round(np.mean(list_r2_s[2]),4), '(', np.round(np.var(list_r2_s[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_s[2]),4), '(', np.round(np.var(list_rmse_s[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_s[2]),4), '(', np.round(np.var(list_crossentropy_s[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_s[2]),4), '(', np.round(np.var(list_similarity_s[2]),4), ')')

CROSSENTROPY no spatial
R2 0.8421 ( 0.0001 )
RMSE 0.1207 ( 0.0 )
Crossentropy 0.6764 ( 0.0002 )
Cos similarity 0.9618 ( 0.0 )
---
CROSSENTROPY spatial
R2 0.9414 ( 0.0 )
RMSE 0.07 ( 0.0 )
Crossentropy 0.627 ( 0.0002 )
Cos similarity 0.9872 ( 0.0 )


## rho=0.9

In [309]:
%%time
list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s, list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = analysis_results(list_solutions_spatial_09, list_solutions_no_spatial_09, rho=0.9) 

Wall time: 10.6 s


In [310]:
print("DIRICHLET no spatial")
print('R2', np.round(np.mean(list_r2_ns[2]),4), '(', np.round(np.var(list_r2_ns[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_ns[2]),4), '(', np.round(np.var(list_rmse_ns[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_ns[2]),4), '(', np.round(np.var(list_crossentropy_ns[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_ns[2]),4), '(', np.round(np.var(list_similarity_ns[2]),4), ')')
print("---\nDIRICHLET spatial")
print('R2', np.round(np.mean(list_r2_s[2]),4), '(', np.round(np.var(list_r2_s[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_s[2]),4), '(', np.round(np.var(list_rmse_s[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_s[2]),4), '(', np.round(np.var(list_crossentropy_s[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_s[2]),4), '(', np.round(np.var(list_similarity_s[2]),4), ')')

DIRICHLET no spatial
R2 0.2073 ( 0.0025 )
RMSE 0.3257 ( 0.0001 )
Crossentropy 0.9033 ( 0.0005 )
Cos similarity 0.7764 ( 0.0001 )
---
DIRICHLET spatial
R2 0.9011 ( 0.0026 )
RMSE 0.1097 ( 0.0008 )
Crossentropy 0.4414 ( 0.0026 )
Cos similarity 0.9776 ( 0.0001 )


In [311]:
list_r2_s, list_rmse_s, list_similarity_s, list_crossentropy_s, list_r2_ns, list_rmse_ns, list_similarity_ns, list_crossentropy_ns = analysis_results(list_solutions_ce_spatial_09, list_solutions_ce_no_spatial_09, rho=0.9) 

In [312]:
print("CROSSENTROPY no spatial")
print('R2', np.round(np.mean(list_r2_ns[2]),4), '(', np.round(np.var(list_r2_ns[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_ns[2]),4), '(', np.round(np.var(list_rmse_ns[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_ns[2]),4), '(', np.round(np.var(list_crossentropy_ns[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_ns[2]),4), '(', np.round(np.var(list_similarity_ns[2]),4), ')')
print("---\nCROSSENTROPY spatial")
print('R2', np.round(np.mean(list_r2_s[2]),4), '(', np.round(np.var(list_r2_s[2]),4), ')')
print('RMSE', np.round(np.mean(list_rmse_s[2]),4), '(', np.round(np.var(list_rmse_s[2]),4), ')')
print('Crossentropy', np.round(np.mean(list_crossentropy_s[2]),4), '(', np.round(np.var(list_crossentropy_s[2]),4), ')')
print('Cos similarity', np.round(np.mean(list_similarity_s[2]),4), '(', np.round(np.var(list_similarity_s[2]),4), ')')

CROSSENTROPY no spatial
R2 0.274 ( 0.0021 )
RMSE 0.3121 ( 0.0002 )
Crossentropy 0.8576 ( 0.0019 )
Cos similarity 0.7877 ( 0.0003 )
---
CROSSENTROPY spatial
R2 0.9761 ( 0.0 )
RMSE 0.0535 ( 0.0 )
Crossentropy 0.3716 ( 0.0009 )
Cos similarity 0.9933 ( 0.0 )
