# Importing the packages and data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import scipy
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from scipy.special import gamma, digamma, polygamma
from scipy import sparse
import math

from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import random

import scipy.optimize
from scipy.optimize import Bounds

In [3]:
from prettytable import PrettyTable

In [4]:
import dirichlet_regression

In [5]:
from scipy.optimize import fmin, newton, minimize

In [177]:
def f_spatial(x, X, Y, Z, W, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    n = X.shape[0]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:-1]
    rho = x[-1]
    M = np.identity(n) - rho*W
    mu = dirichlet_regression.compute_mu_spatial_opti(X, beta, M)
    phi = np.exp(np.matmul(Z,gamma_var))
    return -dirichlet_regression.dirichlet_loglikelihood(mu,phi,Y,epsilon=epsilon)

In [184]:
def fprime_spatial(x, X, Y, Z, W, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    n = X.shape[0]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:-1]
    rho = x[-1]
    M = np.identity(n) - rho*W
    MinvX = np.linalg.solve(M,X)
    mu = dirichlet_regression.compute_mu_spatial_opti(X, beta, M, MinvX=MinvX)
    phi = np.exp(np.matmul(Z,gamma_var))

    beta_grad = dirichlet_regression.dirichlet_gradient_wrt_beta(mu, phi, MinvX, Y, epsilon=epsilon)
    beta_grad[:,0] = 0
    gamma_grad = dirichlet_regression.dirichlet_derivative_wrt_gamma(mu, phi, Y, Z, epsilon=epsilon)
    
    MinvW = np.linalg.solve(M,W)
    rho_derivative = dirichlet_regression.dirichlet_derivative_wrt_rho(mu, phi, beta, W, X, Y, Z, MinvX=MinvX, MinvW=MinvW, epsilon=epsilon)
    return(-np.concatenate([beta_grad.flatten(),gamma_grad,[rho_derivative]]))

In [8]:
def f_no_spatial(x, X, Y, Z, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:]
    mu = dirichlet_regression.compute_mu_3(X, beta)
    phi = np.exp(np.matmul(Z,gamma_var))
    return -dirichlet_regression.dirichlet_loglikelihood(mu,phi,Y,epsilon=epsilon)

In [9]:
def fprime_no_spatial(x, X, Y, Z, epsilon=0):
    K = X.shape[-1]
    J = Y.shape[-1]
    beta = x[:K*J].reshape((K,J))
    beta[:,0] = 0
    gamma_var = x[K*J:]
    mu = dirichlet_regression.compute_mu_3(X, beta)
    phi = np.exp(np.matmul(Z,gamma_var))
    beta_grad = dirichlet_regression.dirichlet_gradient_wrt_beta(mu, phi, X, Y, epsilon=epsilon)
    beta_grad[:,0] = 0
    gamma_grad = dirichlet_regression.dirichlet_derivative_wrt_gamma(mu, phi, beta, X, Y, Z, epsilon=epsilon)
    return(-np.concatenate([beta_grad.flatten(),gamma_grad]))

In [132]:
def create_features_matrices(n_samples,n_features,choice_W='X_dependent',threshold_neighbors=0.3,nneighbors=10):
    X = np.random.uniform(size=(n_samples,n_features))
    X = (X-X.min())/(X.max()-X.min())
    X = np.array([np.concatenate(([1],x)) for x in X])
    Z = np.random.uniform(size=(n_samples,n_features))
    
    if choice_W == 'random':
        neighbors = NearestNeighbors(n_neighbors=nneighbors).fit(np.arange(n_samples).reshape(-1,1))
        W = neighbors.kneighbors_graph(np.arange(n_samples).reshape(-1,1)).toarray()
    else:
        distance_matrix = scipy.spatial.distance_matrix(X,X)
        W = np.zeros(np.shape(distance_matrix))
        W[distance_matrix < threshold_neighbors] = 1
    # replace the 1 on the diagonal by 0
    np.fill_diagonal(W,0)
    # scaling the matrix, so that the sum of each row is 1
    W = W/W.sum(axis=1)[:,None]
    return(X,Z,W)

# With two features

In [188]:
n_features = 2
n_classes = 3

In [189]:
np.random.seed(21)

beta = np.array([[0.  , -1.6 , 1.],
                 [0.  , 1.8, -1.4],
                 [0.  , 1.4, -1.1 ]])

gamma_var = np.round(np.random.normal(size=(n_features)),2)

In [190]:
beta0 = np.zeros((n_features+1,n_classes))
gamma0 = [0.,0.]
rho0 = [0.]
params0_spatial = np.concatenate([beta0.flatten(),gamma0, rho0])
params0_no_spatial = np.concatenate([beta0.flatten(),gamma0])

min_bounds_1 = -np.inf*np.ones(len(params0_spatial)) 
max_bounds_1 = np.inf*np.ones(len(params0_spatial))
min_bounds_1[-1] = -1
max_bounds_1[-1] = 1
bounds_1 = Bounds(min_bounds_1, max_bounds_1)

In [191]:
rho = 0.5
n = 200

In [192]:
X,Z,W = create_features_matrices(n,n_features)

In [193]:
Xbeta = np.matmul(X,beta)

In [194]:
M = np.identity(n) - rho*W

In [195]:
%%time
for _ in range(10):
    Minv = np.linalg.inv(M)
    np.matmul(Minv,Xbeta)

Wall time: 21 ms


In [196]:
%%time
for _ in range(10):
    np.linalg.solve(M,Xbeta)

Wall time: 312 ms


In [197]:
%%time
for _ in range(10):
    sparse.linalg.spsolve(sparse.csc_matrix(M),Xbeta)

Wall time: 25 ms


In [203]:
%%time
for _ in range(10):
    Minv = np.linalg.inv(M)

Wall time: 21.6 ms


In [204]:
%%time
for _ in range(10):
    np.linalg.solve(M,np.identity(n))

Wall time: 18.2 ms


In [206]:
%%time
for _ in range(10):
    sparse.linalg.spsolve(sparse.csc_matrix(M),np.identity(n))

Wall time: 58.2 ms


## With more features and classes

In [207]:
n_features = 16
n_classes = 15

In [208]:
np.random.seed(3)

beta = np.round(np.random.normal(size=(n_features+1,n_classes)),2)

gamma_var = np.round(np.random.normal(size=(n_features)),2)

In [209]:
rho = 0.5
n = 2000

In [210]:
X,Z,W = create_features_matrices(n,n_features,choice_W='random')

In [211]:
%%time
Xbeta = np.matmul(X,beta)

Wall time: 51.6 ms


In [174]:
M = np.identity(n) - rho*W

In [161]:
%%time
for _ in range(10):
    Minv = np.linalg.inv(M)
    np.matmul(Minv,Xbeta)

Wall time: 32 ms


In [162]:
%%time
for _ in range(10):
    np.linalg.solve(M,Xbeta)

Wall time: 25.5 ms


In [163]:
%%time
for _ in range(10):
    sparse.linalg.spsolve(sparse.csc_matrix(M),Xbeta)

Wall time: 7.96 ms


### Time to compute mu

In [175]:
%%time
for _ in range(10):
    Minv = np.linalg.inv(M)
    _ = dirichlet_regression.compute_mu_spatial_2(X, beta, rho=rho, W=W, Minv=Minv)

Wall time: 2.82 s


In [178]:
%%time
for _ in range(10):
    _ = dirichlet_regression.compute_mu_spatial_opti(X, beta, M)

Wall time: 293 ms


In [179]:
MinvX = np.matmul(Minv,X)

In [180]:
%%time
for _ in range(10):
    _ = dirichlet_regression.compute_mu_spatial_opti(X, beta, M, MinvX = MinvX)

Wall time: 7.97 ms


# Neumann series

In [19]:
T = rho*W

In [20]:
%%time
power = T
serie = np.identity(n) + power
for _ in range(2,6):
    power = np.matmul(power,T)
    serie += power

Wall time: 497 ms


In [21]:
serie

array([[1.00107357e+00, 1.16987222e-04, 1.10301872e-05, ...,
        6.75779688e-05, 3.72933442e-06, 7.96013811e-05],
       [1.10987877e-04, 1.00102097e+00, 6.06472962e-05, ...,
        2.66455599e-06, 1.01696052e-07, 7.87002504e-04],
       [6.00171949e-06, 3.47830081e-05, 1.00082369e+00, ...,
        2.99422075e-05, 1.59294583e-05, 3.35404561e-04],
       ...,
       [5.20913510e-05, 2.16495174e-06, 4.24181272e-05, ...,
        1.00090339e+00, 1.79117250e-03, 1.52101877e-05],
       [4.65773413e-06, 1.33878348e-07, 3.65638198e-05, ...,
        2.90215291e-03, 1.00107408e+00, 3.47194755e-06],
       [4.19252826e-05, 4.36912422e-04, 3.24662066e-04, ...,
        1.03927261e-05, 1.46414870e-06, 1.00086996e+00]])

In [22]:
np.linalg.norm(Minv-serie)

0.03652929979247122

In [23]:
%%time
power = T
serie = np.identity(n) + power
for k in range(2,6):
    serie += np.linalg.matrix_power(T,k)

Wall time: 994 ms


In [24]:
%%time
serie = np.identity(n) + T
power = sparse.csr_matrix(T)
sparseT = sparse.csr_matrix(T)
for _ in range(2,6):
    power = power.multiply(sparseT)
    serie += power.todense()

Wall time: 230 ms


# Other

In [None]:
solution = 