# Importing the packages and data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import scipy
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from scipy.special import gamma, digamma, polygamma
import math

from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import random

import scipy.optimize
from scipy.stats import ttest_ind
from scipy.optimize import Bounds
from sklearn.preprocessing import StandardScaler

from prettytable import PrettyTable

In [3]:
import multiprocessing

In [4]:
multiprocessing.cpu_count()

8

In [5]:
import sys
sys.path.append("../smote_cd/")
from smote_cd.dataset_generation import softmax

In [6]:
import dirichlet_regression

In [7]:
from scipy.optimize import fmin, newton, minimize

In [30]:
def f_spatial(x, X, Y, Z, W, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    n = X.shape[0]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:-1]
    rho = x[-1]
    M = np.identity(n) - rho*W
    mu = dirichlet_regression.compute_mu_spatial_opti(X, beta, M)
    phi = np.exp(np.matmul(Z,gamma_var))
    return -dirichlet_regression.dirichlet_loglikelihood(mu,phi,Y,epsilon=epsilon)

In [39]:
def fprime_spatial(x, X, Y, Z, W, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    n = X.shape[0]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:-1]
    rho = x[-1]
    M = np.identity(n) - rho*W
    MinvX = np.linalg.solve(M,X)
    MinvXbeta = np.matmul(MinvX,beta)
    
    mu = dirichlet_regression.compute_mu_spatial_opti(X, beta, M, MXbeta=MinvXbeta)
    phi = np.exp(np.matmul(Z,gamma_var))

    beta_grad = dirichlet_regression.dirichlet_gradient_wrt_beta(mu, phi, MinvX, Y, epsilon=epsilon)
    beta_grad[:,0] = 0
    gamma_grad = dirichlet_regression.dirichlet_derivative_wrt_gamma(mu, phi, Y, Z, epsilon=epsilon)
    rho_derivative = dirichlet_regression.dirichlet_derivative_wrt_rho(mu, phi, beta, M, W, X, Y, Z, MinvXbeta=MinvXbeta, epsilon=epsilon)
    return(-np.concatenate([beta_grad.flatten(),gamma_grad,[rho_derivative]]))

In [9]:
def f_no_spatial(x, X, Y, Z, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:]
    mu = dirichlet_regression.compute_mu_3(X, beta)
    phi = np.exp(np.matmul(Z,gamma_var))
    return -dirichlet_regression.dirichlet_loglikelihood(mu,phi,Y,epsilon=epsilon)

In [10]:
def fprime_no_spatial(x, X, Y, Z, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:]
    mu = dirichlet_regression.compute_mu_3(X, beta)
    phi = np.exp(np.matmul(Z,gamma_var))
    beta_grad = dirichlet_regression.dirichlet_gradient_wrt_beta(mu, phi, X, Y, epsilon=epsilon)
    beta_grad[:,0] = 0
    gamma_grad = dirichlet_regression.dirichlet_derivative_wrt_gamma(mu, phi, beta, X, Y, Z, epsilon=epsilon)
    return(-np.concatenate([beta_grad.flatten(),gamma_grad]))

In [77]:
# sur mon ordi perso:
scipy.__version__

'1.4.1'

# Loading Dataset

In [11]:
Y_occitanie = pd.read_csv('Data Dirichlet/occitanie/Y_occitanie.csv', sep=';')

In [12]:
X_occitanie = pd.read_csv('Data Dirichlet/occitanie/X_occitanie.csv', sep=';')

In [13]:
X_occitanie = X_occitanie.iloc[:,1:]

In [14]:
X_occitanie = X_occitanie.iloc[:,:-1]

In [15]:
W_occitanie = pd.read_csv('Data Dirichlet/occitanie/W_elections_5nn.csv', sep=' ', header=None)

In [16]:
W_occitanie_10 = pd.read_csv('Data Dirichlet/occitanie/W_elections_10nn.csv', sep=' ', header=None)

### With an intercept

In [17]:
X = np.array(X_occitanie,dtype='float')
Y = np.array(Y_occitanie)
Z = np.ones(np.shape(X))
W = np.array(W_occitanie)

In [18]:
W_10 = np.array(W_occitanie_10)

In [19]:
n_classes = 3
n_samples = X.shape[0]
n_features = X.shape[1]

In [20]:
print(n_features, n_samples)

25 207


In [21]:
#X = StandardScaler().fit(X).transform(X)

for j in range(n_features):
    X[:,j] = X[:,j]/np.max(X[:,j])

In [22]:
X_1 = np.ones((n_samples,n_features+1))
X_1[:,1:] = X

In [23]:
beta0 = np.zeros((n_features+1,n_classes))
gamma0 = np.zeros(n_features)
rho0 = 0.
params0_no_spatial = np.concatenate([beta0.flatten(),gamma0])
params0 = np.concatenate([beta0.flatten(),gamma0,[rho0]])

In [24]:
%%time
solution_ns = minimize(f_no_spatial, params0_no_spatial, args=(X_1, Y, Z), jac=fprime_no_spatial)

Wall time: 1.45 s


In [25]:
beta_1_ns = solution_ns.x[:(n_features+1)*n_classes].reshape((n_features+1,n_classes))

mu_1_ns = dirichlet_regression.compute_mu_3(X_1, beta_1_ns)

gamma_var_1_ns = solution_ns.x[(n_features+1)*n_classes:]
phi_1_ns = np.exp(np.matmul(Z,gamma_var_1_ns))

In [26]:
ll_1_ns = dirichlet_regression.dirichlet_loglikelihood(mu_1_ns,phi_1_ns,Y)

In [27]:
print('RESULTS WITHOUT SPATIAL')
print('RMSE:', mean_squared_error(Y,mu_1_ns,squared=False))
print('R2:', r2_score(Y,mu_1_ns))
print('Cross-entropy:', 1/n_samples * np.sum(Y-mu_1_ns))
print('Cosine similarity:',np.mean([np.dot(Y[i],mu_1_ns[i])/(np.linalg.norm(Y[i])*np.linalg.norm(mu_1_ns[i])) for i in range(len(Y))]))
print('AIC:', 2*len(solution_ns.x) - 2*ll_1_ns)

RESULTS WITHOUT SPATIAL
RMSE: 0.07963955132743465
R2: 0.48722499041497613
Cross-entropy: 1.9323671339920016e-10
Cosine similarity: 0.9747790819450894
AIC: -762.1476284390359


In [28]:
min_bounds_1 = -np.inf*np.ones(len(params0)) 
max_bounds_1 = np.inf*np.ones(len(params0))
min_bounds_1[-1] = -1
max_bounds_1[-1] = 1
bounds_1 = Bounds(min_bounds_1, max_bounds_1)

In [35]:
%%time
solution_s = minimize(f_spatial, params0, args=(X_1, Y, Z, W), jac=fprime_spatial, bounds=bounds_1)

Wall time: 3min


In [48]:
%%time
solution_s = minimize(f_spatial, params0, args=(X_1, Y, Z, W), bounds=bounds_1)

Wall time: 1min


In [47]:
%%time
solution_s = minimize(f_spatial, params0, args=(X_1, Y, Z, W), jac=fprime_spatial, bounds=bounds_1)

Wall time: 2min 54s


In [40]:
beta_1_s = solution_s.x[:(n_features+1)*n_classes].reshape((n_features+1,n_classes))
rho_1_s = solution_s.x[-1]

mu_1_s = dirichlet_regression.compute_mu_spatial_3(X_1, beta_1_s, rho_1_s, W)

gamma_var_1_s = solution_s.x[(n_features+1)*n_classes:-1]
phi_1_s = np.exp(np.matmul(Z,gamma_var_1_s))

In [41]:
ll_1_s = dirichlet_regression.dirichlet_loglikelihood(mu_1_s,phi_1_s,Y)

In [42]:
print('RESULTS WITH SPATIAL (5 neighbors)')
print('RMSE:', mean_squared_error(Y,mu_1_s,squared=False))
print('R2:', r2_score(Y,mu_1_s))
print('Cross-entropy:', 1/n_samples * np.sum(Y-mu_1_s))
print('Cosine similarity:',np.mean([np.dot(Y[i],mu_1_s[i])/(np.linalg.norm(Y[i])*np.linalg.norm(mu_1_s[i])) for i in range(len(Y))]))
print('AIC:', 2*len(solution_s.x) - 2*ll_1_s)

RESULTS WITH SPATIAL (5 neighbors)
RMSE: 0.07610701708508315
R2: 0.533095793501082
Cross-entropy: 1.9323671339920016e-10
Cosine similarity: 0.9772008823390392
AIC: -797.5077244063186


In [34]:
print('RESULTS WITH SPATIAL (5 neighbors)')
print('RMSE:', mean_squared_error(Y,mu_1_s,squared=False))
print('R2:', r2_score(Y,mu_1_s))
print('Cross-entropy:', 1/n_samples * np.sum(Y-mu_1_s))
print('Cosine similarity:',np.mean([np.dot(Y[i],mu_1_s[i])/(np.linalg.norm(Y[i])*np.linalg.norm(mu_1_s[i])) for i in range(len(Y))]))
print('AIC:', 2*len(solution_s.x) - 2*ll_1_s)

RESULTS WITH SPATIAL (5 neighbors)
RMSE: 0.07198101955891562
R2: 0.5822336711415361
Cross-entropy: 1.9323671460596431e-10
Cosine similarity: 0.9794439715584995
AIC: -847.2044390150529


In [37]:
%%time
solution_s = minimize(f_spatial, params0, args=(X_1, Y, Z, W_10), jac=fprime_spatial, bounds=bounds_1)

Wall time: 6min 10s


In [38]:
beta_1_s = solution_s.x[:(n_features+1)*n_classes].reshape((n_features+1,n_classes))
rho_1_s = solution_s.x[-1]

mu_1_s = dirichlet_regression.compute_mu_spatial_3(X_1, beta_1_s, rho_1_s, W)

gamma_var_1_s = solution_s.x[(n_features+1)*n_classes:-1]
phi_1_s = np.exp(np.matmul(Z,gamma_var_1_s))

In [39]:
ll_1_s = dirichlet_regression.dirichlet_loglikelihood(mu_1_s,phi_1_s,Y)

In [40]:
print('RESULTS WITH SPATIAL (10 neighbors)')
print('RMSE:', mean_squared_error(Y,mu_1_s,squared=False))
print('R2:', r2_score(Y,mu_1_s))
print('Cosine similarity:',np.mean([np.dot(Y[i],mu_1_s[i])/(np.linalg.norm(Y[i])*np.linalg.norm(mu_1_s[i])) for i in range(len(Y))]))
print('AIC:', 2*len(solution_s.x) - 2*ll_1_s)

RESULTS WITH SPATIAL (10 neighbors)
RMSE: 0.38039410240104976
R2: -11.323732095950902
Cosine similarity: 0.709724168953594
AIC: 17436.196067301942


In [114]:
%%time
solution_s_10 = minimize(f_spatial, params0, args=(X_1, Y, Z, W_10), jac=fprime_spatial, bounds=bounds_1)

Wall time: 2min 2s


In [117]:
beta_1_s_10 = solution_s_10.x[:(n_features+1)*n_classes].reshape((n_features+1,n_classes))
rho_1_s_10 = solution_s_10.x[-1]

mu_1_s_10 = dirichlet_regression.compute_mu_spatial_3(X_1, beta_1_s_10, rho_1_s_10, W)

gamma_var_1_s_10 = solution_s_10.x[(n_features+1)*n_classes:-1]
phi_1_s_10 = np.exp(np.matmul(Z,gamma_var_1_s_10))

In [118]:
ll_1_s_10 = dirichlet_regression.dirichlet_loglikelihood(mu_1_s_10,phi_1_s_10,Y)

In [121]:
print('RESULTS WITH SPATIAL (10 neighbors)')
print('RMSE:', mean_squared_error(Y,mu_1_s_10,squared=False))
print('R2:', r2_score(Y,mu_1_s_10))
print('Cosine similarity:',np.mean([np.dot(Y[i],mu_1_s_10[i])/(np.linalg.norm(Y[i])*np.linalg.norm(mu_1_s_10[i])) for i in range(len(Y))]))
print('AIC:', 2*len(solution_s_10.x) - 2*ll_1_s_10)

RESULTS WITH SPATIAL (10 neighbors)
RMSE: 0.18665124330929364
R2: -2.0323614700845245
Cosine similarity: 0.9054171827061012
AIC: 1763.0818001165512


# Tests with CV

In [54]:
import dirichlet_perf_eval

In [160]:
%%time
results_temp = dirichlet_perf_eval.eval_perf_maupiti(0, X_1, Y, Z, W, k_folds=4)

  exp_MXbeta = np.exp(MXbeta)


Wall time: 8min 15s


In [162]:
%%time
results_temp = dirichlet_perf_eval.eval_perf_maupiti(0, X_1, Y, Z, W, k_folds=4)

0


  exp_MXbeta = np.exp(MXbeta)


Wall time: 7min 45s


In [163]:
r2_ns, r2_s, rmse_ns, rmse_s, aic_ns, aic_s, crossentropy_ns, crossentropy_s, r2_test_ns, r2_test_s, rmse_test_ns, rmse_test_s, aic_test_ns, aic_test_s, crossentropy_test_ns, crossentropy_test_s = results_temp

In [164]:
columns_names = ['MODEL','$R^2$','RMSE','Cross-entropy','AIC']
pt = PrettyTable(columns_names)
pt.title = 'ELECTIONS RESULTS ON TEST SET'

pt.add_row(['NO SPATIAL', 
            str(np.round(np.mean(r2_test_ns),3))+' ('+ str(np.round(np.std(r2_test_ns),3)) +')',
            str(np.round(np.mean(rmse_test_ns),3))+' ('+ str(np.round(np.std(rmse_test_ns),3)) +')',
            str(np.round(-np.mean(crossentropy_test_ns),3))+' ('+ str(np.round(np.std(crossentropy_test_ns),3)) +')',
            str(np.round(np.mean(aic_test_ns),3))+' ('+ str(np.round(np.std(aic_test_ns),3)) +')'])
pt.add_row(['SPATIAL', 
            str(np.round(np.mean(r2_test_s),3))+' ('+ str(np.round(np.std(r2_test_s),3)) +')',
            str(np.round(np.mean(rmse_test_s),3))+' ('+ str(np.round(np.std(rmse_test_s),3)) +')',
            str(np.round(-np.mean(crossentropy_test_s),3))+' ('+ str(np.round(np.std(crossentropy_test_s),3)) +')',
            str(np.round(np.mean(aic_test_s),3))+' ('+ str(np.round(np.std(aic_test_s),3)) +')'])

print(pt)

+---------------------------------------------------------------------------------------+
|                             ELECTIONS RESULTS ON TEST SET                             |
+------------+---------------+--------------+---------------------+---------------------+
|   MODEL    |     $R^2$     |     RMSE     |    Cross-entropy    |         AIC         |
+------------+---------------+--------------+---------------------+---------------------+
| NO SPATIAL | 0.209 (0.156) | 0.095 (0.01) |    54.948 (0.801)   |   13.953 (35.728)   |
|  SPATIAL   | -2.19 (2.751) | 0.18 (0.093) | 4248.022 (7257.795) | 19620.4 (33640.383) |
+------------+---------------+--------------+---------------------+---------------------+


In [53]:
from multiprocessing import Pool
from functools import partial

In [55]:
%%time
list_iter = np.arange(0,10)
with Pool(6) as pool:
    all_res_parallel = pool.map(partial(dirichlet_perf_eval.eval_perf, X=X_1, Y=Y, Z=Z, W=W, k_folds=4), list_iter)

Wall time: 27min 24s


In [56]:
np.save('Data Dirichlet/res_elections.npy',all_res_parallel)

In [57]:
list_r2_ns, list_r2_s, list_rmse_ns, list_rmse_s, list_aic_ns, list_aic_s, list_crossentropy_ns, list_crossentropy_s, list_similarity_ns, list_similarity_s = [],[],[],[],[],[],[],[],[],[]
list_r2_test_ns, list_r2_test_s, list_rmse_test_ns, list_rmse_test_s, list_aic_test_ns, list_aic_test_s, list_crossentropy_test_ns, list_crossentropy_test_s, list_similarity_test_ns, list_similarity_test_s = [],[],[],[],[],[],[],[],[],[]

In [58]:
for res in all_res_parallel:
    r2_ns, r2_s, rmse_ns, rmse_s, aic_ns, aic_s, crossentropy_ns, crossentropy_s, similarity_ns, similarity_s, r2_test_ns, r2_test_s, rmse_test_ns, rmse_test_s, aic_test_ns, aic_test_s, crossentropy_test_ns, crossentropy_test_s, similarity_test_ns, similarity_test_s, = res
    list_r2_ns.append(r2_ns)
    list_r2_s.append(r2_s)
    list_rmse_ns.append(rmse_ns)
    list_rmse_s.append(rmse_s)
    list_aic_ns.append(aic_ns) 
    list_aic_s.append(aic_s) 
    list_crossentropy_ns.append(crossentropy_ns) 
    list_crossentropy_s.append(crossentropy_s)
    list_similarity_ns.append(similarity_ns) 
    list_similarity_s.append(similarity_s)
    list_r2_test_ns.append(r2_test_ns)
    list_r2_test_s.append(r2_test_s)
    list_rmse_test_ns.append(rmse_test_ns)
    list_rmse_test_s.append(rmse_test_s)
    list_aic_test_ns.append(aic_test_ns) 
    list_aic_test_s.append(aic_test_s) 
    list_crossentropy_test_ns.append(crossentropy_test_ns) 
    list_crossentropy_test_s.append(crossentropy_test_s)
    list_similarity_test_ns.append(similarity_test_ns) 
    list_similarity_test_s.append(similarity_test_s)

In [66]:
columns_names = ['MODEL','$R^2$','RMSE','Cross-entropy','Similarity','AIC']
pt = PrettyTable(columns_names)
pt.title = 'ELECTIONS RESULTS ON WHOLE DATASET (4-fold CV, 10 iterations)'

pt.add_row(['NO SPATIAL', 
            str(np.round(np.mean(list_r2_ns),3))+' ('+ str(np.round(np.std(list_r2_ns),3)) +')',
            str(np.round(np.mean(list_rmse_ns),3))+' ('+ str(np.round(np.std(list_rmse_ns),3)) +')',
            str(np.round(-np.mean(list_crossentropy_ns),3))+' ('+ str(np.round(np.std(list_crossentropy_ns),3)) +')',
            str(np.round(np.mean(list_similarity_ns),3))+' ('+ str(np.round(np.std(list_similarity_ns),3)) +')',
            str(np.round(np.mean(list_aic_ns),3))+' ('+ str(np.round(np.std(list_aic_ns),3)) +')'])
pt.add_row(['SPATIAL', 
            str(np.round(np.mean(list_r2_s),3))+' ('+ str(np.round(np.std(list_r2_s),3)) +')',
            str(np.round(np.mean(list_rmse_s),3))+' ('+ str(np.round(np.std(list_rmse_s),3)) +')',
            str(np.round(-np.mean(list_crossentropy_s),3))+' ('+ str(np.round(np.std(list_crossentropy_s),3)) +')',
            str(np.round(np.mean(list_similarity_s),3))+' ('+ str(np.round(np.std(list_similarity_s),3)) +')',
            str(np.round(np.mean(list_aic_s),3))+' ('+ str(np.round(np.std(list_aic_s),3)) +')'])

print(pt)

+------------------------------------------------------------------------------------------------------------+
|                       ELECTIONS RESULTS ON WHOLE DATASET (4-fold CV, 10 iterations)                        |
+------------+----------------+---------------+----------------------+---------------+-----------------------+
|   MODEL    |     $R^2$      |      RMSE     |    Cross-entropy     |   Similarity  |          AIC          |
+------------+----------------+---------------+----------------------+---------------+-----------------------+
| NO SPATIAL |  0.457 (0.01)  | 0.082 (0.001) |    217.34 (0.117)    | 0.973 (0.001) |   -734.928 (11.936)   |
|  SPATIAL   | -1.489 (3.103) | 0.151 (0.082) | 5369.113 (17475.466) | 0.915 (0.097) | 25128.774 (82567.658) |
+------------+----------------+---------------+----------------------+---------------+-----------------------+


In [65]:
columns_names = ['MODEL','$R^2$','RMSE','Cross-entropy','Similarity','AIC']
pt = PrettyTable(columns_names)
pt.title = 'ELECTIONS RESULTS ON TEST SET (4-fold CV, 10 iterations)'

pt.add_row(['NO SPATIAL', 
            str(np.round(np.mean(list_r2_test_ns),3))+' ('+ str(np.round(np.std(list_r2_test_ns),3)) +')',
            str(np.round(np.mean(list_rmse_test_ns),3))+' ('+ str(np.round(np.std(list_rmse_test_ns),3)) +')',
            str(np.round(-np.mean(list_crossentropy_test_ns),3))+' ('+ str(np.round(np.std(list_crossentropy_test_ns),3)) +')',
            str(np.round(np.mean(list_similarity_test_ns),3))+' ('+ str(np.round(np.std(list_similarity_test_ns),3)) +')',
            str(np.round(np.mean(list_aic_test_ns),3))+' ('+ str(np.round(np.std(list_aic_test_ns),3)) +')'])
pt.add_row(['SPATIAL', 
            str(np.round(np.mean(list_r2_test_s),3))+' ('+ str(np.round(np.std(list_r2_test_s),3)) +')',
            str(np.round(np.mean(list_rmse_test_s),3))+' ('+ str(np.round(np.std(list_rmse_test_s),3)) +')',
            str(np.round(-np.mean(list_crossentropy_test_s),3))+' ('+ str(np.round(np.std(list_crossentropy_test_s),3)) +')',
            str(np.round(np.mean(list_similarity_test_s),3))+' ('+ str(np.round(np.std(list_similarity_test_s),3)) +')',
            str(np.round(np.mean(list_aic_test_s),3))+' ('+ str(np.round(np.std(list_aic_test_s),3)) +')'])

print(pt)

+---------------------------------------------------------------------------------------------------------+
|                         ELECTIONS RESULTS ON TEST SET (4-fold CV, 10 iterations)                        |
+------------+---------------+---------------+---------------------+---------------+----------------------+
|   MODEL    |     $R^2$     |      RMSE     |    Cross-entropy    |   Similarity  |         AIC          |
+------------+---------------+---------------+---------------------+---------------+----------------------+
| NO SPATIAL | 0.269 (0.104) | 0.093 (0.007) |    54.833 (0.625)   | 0.965 (0.005) |    5.987 (29.994)    |
|  SPATIAL   | -1.88 (3.125) | 0.162 (0.078) | 1361.049 (4418.112) | 0.905 (0.096) | 6466.386 (20362.257) |
+------------+---------------+---------------+---------------------+---------------+----------------------+


# 5-folds

In [67]:
%%time
list_iter = np.arange(0,10)
with Pool(5) as pool:
    all_res_parallel_10cv = pool.map(partial(dirichlet_perf_eval.eval_perf, X=X_1, Y=Y, Z=Z, W=W, k_folds=10), list_iter)

Wall time: 1h 29min 6s


In [68]:
np.save('Data Dirichlet/res_elections_10cv.npy',all_res_parallel_10cv)

In [69]:
list_r2_ns, list_r2_s, list_rmse_ns, list_rmse_s, list_aic_ns, list_aic_s, list_crossentropy_ns, list_crossentropy_s, list_similarity_ns, list_similarity_s = [],[],[],[],[],[],[],[],[],[]
list_r2_test_ns, list_r2_test_s, list_rmse_test_ns, list_rmse_test_s, list_aic_test_ns, list_aic_test_s, list_crossentropy_test_ns, list_crossentropy_test_s, list_similarity_test_ns, list_similarity_test_s = [],[],[],[],[],[],[],[],[],[]

In [70]:
for res in all_res_parallel_10cv:
    r2_ns, r2_s, rmse_ns, rmse_s, aic_ns, aic_s, crossentropy_ns, crossentropy_s, similarity_ns, similarity_s, r2_test_ns, r2_test_s, rmse_test_ns, rmse_test_s, aic_test_ns, aic_test_s, crossentropy_test_ns, crossentropy_test_s, similarity_test_ns, similarity_test_s, = res
    list_r2_ns.append(r2_ns)
    list_r2_s.append(r2_s)
    list_rmse_ns.append(rmse_ns)
    list_rmse_s.append(rmse_s)
    list_aic_ns.append(aic_ns) 
    list_aic_s.append(aic_s) 
    list_crossentropy_ns.append(crossentropy_ns) 
    list_crossentropy_s.append(crossentropy_s)
    list_similarity_ns.append(similarity_ns) 
    list_similarity_s.append(similarity_s)
    list_r2_test_ns.append(r2_test_ns)
    list_r2_test_s.append(r2_test_s)
    list_rmse_test_ns.append(rmse_test_ns)
    list_rmse_test_s.append(rmse_test_s)
    list_aic_test_ns.append(aic_test_ns) 
    list_aic_test_s.append(aic_test_s) 
    list_crossentropy_test_ns.append(crossentropy_test_ns) 
    list_crossentropy_test_s.append(crossentropy_test_s)
    list_similarity_test_ns.append(similarity_test_ns) 
    list_similarity_test_s.append(similarity_test_s)

In [71]:
columns_names = ['MODEL','$R^2$','RMSE','Cross-entropy','Similarity','AIC']
pt = PrettyTable(columns_names)
pt.title = 'ELECTIONS RESULTS ON WHOLE DATASET (10-fold CV, 10 iterations)'

pt.add_row(['NO SPATIAL', 
            str(np.round(np.mean(list_r2_ns),3))+' ('+ str(np.round(np.std(list_r2_ns),3)) +')',
            str(np.round(np.mean(list_rmse_ns),3))+' ('+ str(np.round(np.std(list_rmse_ns),3)) +')',
            str(np.round(-np.mean(list_crossentropy_ns),3))+' ('+ str(np.round(np.std(list_crossentropy_ns),3)) +')',
            str(np.round(np.mean(list_similarity_ns),3))+' ('+ str(np.round(np.std(list_similarity_ns),3)) +')',
            str(np.round(np.mean(list_aic_ns),3))+' ('+ str(np.round(np.std(list_aic_ns),3)) +')'])
pt.add_row(['SPATIAL', 
            str(np.round(np.mean(list_r2_s),3))+' ('+ str(np.round(np.std(list_r2_s),3)) +')',
            str(np.round(np.mean(list_rmse_s),3))+' ('+ str(np.round(np.std(list_rmse_s),3)) +')',
            str(np.round(-np.mean(list_crossentropy_s),3))+' ('+ str(np.round(np.std(list_crossentropy_s),3)) +')',
            str(np.round(np.mean(list_similarity_s),3))+' ('+ str(np.round(np.std(list_similarity_s),3)) +')',
            str(np.round(np.mean(list_aic_s),3))+' ('+ str(np.round(np.std(list_aic_s),3)) +')'])

print(pt)

+-----------------------------------------------------------------------------------------------------+
|                    ELECTIONS RESULTS ON WHOLE DATASET (10-fold CV, 10 iterations)                   |
+------------+----------------+--------------+---------------------+---------------+------------------+
|   MODEL    |     $R^2$      |     RMSE     |    Cross-entropy    |   Similarity  |       AIC        |
+------------+----------------+--------------+---------------------+---------------+------------------+
| NO SPATIAL | 0.478 (0.004)  |  0.08 (0.0)  |   217.064 (0.051)   |  0.974 (0.0)  | -754.235 (4.424) |
|  SPATIAL   | -0.248 (2.014) | 0.11 (0.054) | 1137.771 (6833.386) | 0.955 (0.053) |    inf (nan)     |
+------------+----------------+--------------+---------------------+---------------+------------------+


  x = asanyarray(arr - arrmean)


In [72]:
columns_names = ['MODEL','$R^2$','RMSE','Cross-entropy','Similarity','AIC']
pt = PrettyTable(columns_names)
pt.title = 'ELECTIONS RESULTS ON TEST SET (10-fold CV, 10 iterations)'

pt.add_row(['NO SPATIAL', 
            str(np.round(np.mean(list_r2_test_ns),3))+' ('+ str(np.round(np.std(list_r2_test_ns),3)) +')',
            str(np.round(np.mean(list_rmse_test_ns),3))+' ('+ str(np.round(np.std(list_rmse_test_ns),3)) +')',
            str(np.round(-np.mean(list_crossentropy_test_ns),3))+' ('+ str(np.round(np.std(list_crossentropy_test_ns),3)) +')',
            str(np.round(np.mean(list_similarity_test_ns),3))+' ('+ str(np.round(np.std(list_similarity_test_ns),3)) +')',
            str(np.round(np.mean(list_aic_test_ns),3))+' ('+ str(np.round(np.std(list_aic_test_ns),3)) +')'])
pt.add_row(['SPATIAL', 
            str(np.round(np.mean(list_r2_test_s),3))+' ('+ str(np.round(np.std(list_r2_test_s),3)) +')',
            str(np.round(np.mean(list_rmse_test_s),3))+' ('+ str(np.round(np.std(list_rmse_test_s),3)) +')',
            str(np.round(-np.mean(list_crossentropy_test_s),3))+' ('+ str(np.round(np.std(list_crossentropy_test_s),3)) +')',
            str(np.round(np.mean(list_similarity_test_s),3))+' ('+ str(np.round(np.std(list_similarity_test_s),3)) +')',
            str(np.round(np.mean(list_aic_test_s),3))+' ('+ str(np.round(np.std(list_aic_test_s),3)) +')'])

print(pt)

+----------------------------------------------------------------------------------------------------+
|                     ELECTIONS RESULTS ON TEST SET (10-fold CV, 10 iterations)                      |
+------------+----------------+---------------+-------------------+---------------+------------------+
|   MODEL    |     $R^2$      |      RMSE     |   Cross-entropy   |   Similarity  |       AIC        |
+------------+----------------+---------------+-------------------+---------------+------------------+
| NO SPATIAL | 0.245 (0.202)  |  0.09 (0.013) |   21.89 (0.553)   |  0.967 (0.01) | 122.807 (18.952) |
|  SPATIAL   | -0.937 (2.149) | 0.129 (0.056) | 114.318 (675.173) | 0.941 (0.052) |    inf (nan)     |
+------------+----------------+---------------+-------------------+---------------+------------------+
