# Importing the packages and data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
from sklearn.metrics import r2_score, mean_squared_error

from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
import sys
sys.path.insert(1, '../sar_dirichlet')
import dirichlet_regression
import dirichlet_regression_opti

In [4]:
from func_test import cos_similarity, create_features_matrices

# With two features

In [5]:
n_features = 2
n_classes = 3

In [6]:
np.random.seed(21)

beta = np.array([[0.  , 0. , .1],
                 [0.  , 1., -2.],
                 [0.  , -1., -2. ]])

gamma_var = np.array([2.,3.])

In [7]:
n_repeat = 100
list_n_samples = [50,200,1000]

In [8]:
cov_matrix = np.array([[1., 0.2], [0.2, 1.]])

# Estimation of the parameters

In [10]:
def diagonalize(M):
    eigenvalues, P = np.linalg.eig(M)
    D = np.diag(eigenvalues)
    return(P, D)

In [11]:
n_samples=200
rho=0.9

X,Z,W = create_features_matrices(n_samples,n_features,choice_W='random_distance',nneighbors=10,cov_mat=cov_matrix)
Z[:,0] = 1
M = np.identity(n_samples) - rho*W

mu = dirichlet_regression.compute_mu_spatial(X, beta, M)
#phi = np.exp(np.matmul(Z,gamma_var))
phi = 15*np.ones(n_samples)
alpha = mu*phi[:,None]

Y = np.array([np.random.dirichlet(alpha_i) for alpha_i in alpha])
Y = (Y*(n_samples-1)+1/n_classes)/n_samples

In [16]:
I = np.identity(n_samples)

If $W$ is diagonalizable, then $W = PDP^{-1}$ with $D$ a diagonal matrix.

\begin{align*}
    (I - \rho W)^{-1} &= (I - \rho PDP^{-1})^{-1}\\
    &= P P^{-1} (I - \rho PDP^{-1})^{-1} P P^{-1}\\
    &= P \left( P^{-1} (I - \rho PDP^{-1}) P \right)^{-1} P^{-1}\\
    &= P (I - \rho D)^{-1} P^{-1}
\end{align*}

We just need to compute the inverse of $(I - \rho D)$, which is straightforward because it is a diagonal matrix.

In [17]:
%%time
P, D = diagonalize(W)

Wall time: 25.9 ms


In [18]:
np.count_nonzero(D - np.diag(np.diagonal(D)))

0

In [18]:
%%time
P_inv = np.linalg.inv(P)

Wall time: 15.6 ms


In [14]:
np.linalg.inv(I - rho*W)

array([[1.39162964e+00, 6.04045923e-03, 7.92622397e-04, ...,
        5.34241201e-04, 1.03964264e-04, 1.99769206e-04],
       [2.09871330e-02, 1.37160804e+00, 1.64043479e-03, ...,
        1.89967177e-03, 4.16291544e-04, 9.57836831e-03],
       [3.59740754e-03, 6.85942667e-03, 1.17286198e+00, ...,
        4.16781218e-01, 4.94581831e-02, 3.84177592e-02],
       ...,
       [1.19203874e-03, 2.31260208e-03, 2.37565403e-01, ...,
        1.39768922e+00, 8.20240240e-02, 4.16780340e-02],
       [8.61918296e-05, 2.03139896e-04, 1.18234702e-02, ...,
        3.57132062e-02, 1.78372458e+00, 2.56267827e-02],
       [6.14163617e-05, 1.54822406e-03, 4.92316805e-03, ...,
        1.20347727e-02, 9.95711002e-03, 3.99795756e+00]])

In [21]:
np.real(np.matmul(P, np.matmul(np.linalg.inv(I-rho*D), P_inv)))

array([[1.39162964e+00, 6.04045923e-03, 7.92622397e-04, ...,
        5.34241201e-04, 1.03964264e-04, 1.99769206e-04],
       [2.09871330e-02, 1.37160804e+00, 1.64043479e-03, ...,
        1.89967177e-03, 4.16291544e-04, 9.57836831e-03],
       [3.59740754e-03, 6.85942667e-03, 1.17286198e+00, ...,
        4.16781218e-01, 4.94581831e-02, 3.84177592e-02],
       ...,
       [1.19203874e-03, 2.31260208e-03, 2.37565403e-01, ...,
        1.39768922e+00, 8.20240240e-02, 4.16780340e-02],
       [8.61918296e-05, 2.03139896e-04, 1.18234702e-02, ...,
        3.57132062e-02, 1.78372458e+00, 2.56267827e-02],
       [6.14163617e-05, 1.54822406e-03, 4.92316805e-03, ...,
        1.20347727e-02, 9.95711002e-03, 3.99795756e+00]])

In [23]:
%%time
D_inv = np.zeros(I.shape)
np.fill_diagonal(D_inv, 1/np.real(I-rho*D).diagonal())

Wall time: 0 ns


In [25]:
np.real(np.matmul(P, np.matmul(D_inv, P_inv)))

array([[ 1.39074331e+00,  4.19670823e-03,  1.21706347e-03, ...,
         1.15409611e-03,  1.53409768e-04,  5.99685601e-04],
       [ 2.46488888e-02,  1.37202962e+00,  1.30032154e-03, ...,
         2.61596271e-03,  1.84238182e-04,  9.40255350e-03],
       [-1.18978253e-03,  7.86939625e-03,  1.17415109e+00, ...,
         4.19075976e-01,  4.92451210e-02,  3.96931298e-02],
       ...,
       [-1.32839153e-03,  3.07444496e-03,  2.35786110e-01, ...,
         1.39529542e+00,  8.22684646e-02,  4.21870380e-02],
       [ 2.80791209e-04,  4.55745818e-05,  1.18203729e-02, ...,
         3.59092397e-02,  1.78380281e+00,  2.54673308e-02],
       [ 5.25149647e-05,  1.55550874e-03,  4.93138944e-03, ...,
         1.20492303e-02,  9.94326421e-03,  3.99793702e+00]])

In [24]:
%%time
for _ in range(100):
    _ = np.linalg.solve(D,P)

Wall time: 464 ms


In [31]:
%%time
D_inv = np.zeros(D.shape)
np.fill_diagonal(D_inv, 1/D.diagonal())

Wall time: 7.01 ms


  a.flat[:end:step] = val


In [31]:
%%time
for _ in range(100):
    MX1 = np.linalg.solve(M,X)

Wall time: 486 ms


In [45]:
P_inv_X = np.matmul(P_inv, X)

In [52]:
%%time
for _ in range(100):
    D_inv = np.zeros(I.shape)
    np.fill_diagonal(D_inv, 1/np.real(I-rho*D).diagonal())
    #MX2 = np.matmul( np.real(np.matmul(P, np.matmul(D_inv, P_inv))), X)
    MX2 = np.real(np.matmul(np.matmul(P, D_inv), P_inv_X))

Wall time: 126 ms


In [77]:
np.sum(Y,axis=0)

array([38.38067552, 76.35362904, 85.26569544])

# Tests time

In [19]:
%%time
reg_spatial_opti = dirichlet_regression_opti.dirichletRegressor(spatial=True, maxfun=5000)
reg_spatial_opti.fit(X, Y, parametrization='alternative', Z=Z, W=W, fit_intercept=False, verbose=1)

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
Wall time: 990 ms


In [20]:
%%time
reg_spatial = dirichlet_regression.dirichletRegressor(spatial=True, maxfun=5000)
reg_spatial.fit(X, Y, parametrization='alternative', Z=Z, W=W, fit_intercept=False, verbose=1)


CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
Wall time: 1.57 s
