In [1]:
# TO BE RUN ONLY ONCE!
import os
os.chdir("../../../")

In [2]:
## *******************************************************************************

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import subprocess
import pymc3 as pm
import seaborn as sns

import spatial_mix.hdp_utils as hdp_utils
from spatial_mix.utils import *
from spatial_mix.protos.py.univariate_mixture_state_pb2 import UnivariateState, UnivariateMixtureState, HdpState

current_palette = sns.color_palette()

ModuleNotFoundError: No module named 'spatial_mix.protos.py.sampler_params_pb2'

## Simulate data

Simulate data as follows: for each little square we simulate data from a 3 components mixtures
$$ y_{ij} \sim w_{i1} N(-5, 1) + w_{i2} N(0, 1) + w_{i3} N(5, 1)$$

the weights depend on the location (x, y) of the center of the square (the orange dot)
$$ \widetilde{w}_{il} = \alpha_l x_i + \beta_l y_i \quad l=1, 2$$

finally $w_i = alr^{-1} ([\widetilde{w}_{i1}, \widetilde{w}_{i2}])$

In [4]:
def inv_alr(x):
    out = np.exp(np.hstack((x, 1)))
    return out / np.sum(out)

In [5]:
def simulate_from_mixture(weights):
    means = [-5, 0, 5]
    comp = np.random.choice(3, p=weights)
    return np.random.normal(loc=means[comp], scale=1)

In [6]:
Nx = 4
Ny = 4
N = Nx*Ny

centers = np.zeros((N, 2))
for i in range(Nx):
    for j in range(Ny):
        centers[i*Ny + j, :] = np.array([i, j])

alpha1 = 0.2
alpha2 = -0.2
beta1 = 0.1
beta2 = -0.1

weights = []
mean_centers = np.mean(centers, axis=0)
for center in centers:
    w1 = alpha1 * (center[0] - mean_centers[0]) + beta1 * (center[1] - mean_centers[1])
    w2 = alpha2 * (center[0] - mean_centers[0]) + beta2 * (center[1] - mean_centers[1])
    weights.append(inv_alr([w1, w2]))

In [7]:
print(os.getcwd())

/home/riccardo/Desktop/spatial_lda


In [8]:
G = np.diag(np.ones(N-1), 1) + np.diag(np.ones(N-1), -1) +\
     np.diag(np.ones(N-Ny), Ny) + np.diag(np.ones(N-Ny), -Ny)
# tolgo i bordi
border_indices = Nx*np.arange(1, Ny)
G[border_indices, border_indices - 1] = 0
G[border_indices - 1, border_indices] = 0

print(G)
np.savetxt("./notebooks/SIS/linear_weights/data/simulated_linear_G.csv", G, delimiter=",")

[[0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0.]]


In [9]:
data = []
for i in range(N):
    numSamples = 100
    #if i == 5:
    #    numSamples = 20    
    for j in range(numSamples):
        data.append([i, simulate_from_mixture(weights[i])])

In [10]:
df = pd.DataFrame(data, columns=["group", "data"])
df.to_csv("./notebooks/SIS/linear_weights/data/simulated_linear_data.csv", index=False)

## Run via the Python interface

In [None]:
from spatial_mix.protos.py.sampler_params_pb2 import SamplerParams
from google.protobuf import text_format

df = pd.read_csv("notebooks/SIS/linear_weights/data/simulated_linear_data.csv")
datas = []
for g in range(8):
    datas.append(df[df.group == g].data.to_list())

burnin = 10000
niter = 10000
thin = 10

chains = runSpatialMixtureSamplerFromData(burnin, niter, thin, datas, Dmat2, 
                                          "spatial_mix/resources/sampler_params.asciipb")

## Plot density estimates

In [None]:
xgrid = np.linspace(-10, 10, 1000)
#dens = estimateDensities(chains, xgrid)

In [None]:
from scipy.stats import norm 

fig, axes = plt.subplots(nrows=int(np.floor(N/4)) + N%4, ncols=4, figsize=(15, 8))
axes = axes.flat

for g in range(N):
    axes[g].plot(xgrid, 
                 weights[g][0] * norm.pdf(xgrid, -5, 1.0) +
                 weights[g][1] * norm.pdf(xgrid, 0.0, 1.0) +
                 weights[g][2] * norm.pdf(xgrid, 5.0, 1.0))
    #axes[g].plot(xgrid, np.mean(dens[g], 0))
    axes[g].set_ylim([0, 0.3])
    #intervals = np.array([pm.stats.hpd(dens[g][:, i], 0.05) for i in range(dens[g].shape[1])])
    #axes[g].fill_between(xgrid, intervals[:, 0], intervals[:, 1], alpha=0.3, color=current_palette[2])
    axes[g].set_title("Group: {0}".format(g))
#     sns.kdeplot(datas[g], ax=axes[g], color=current_palette[3])
    
plt.tight_layout()