# Multivariate Gaussian Mixture Example

In [None]:
import sys
sys.path.append("../")

import numpy as np
import matplotlib.pyplot as plt
import time

import multiprocessing as mp

from ebc.sequential.iterative_with_convexification import SensitivityBasedFW

from ebc.gaussian import gaussian_multivariate_log_likelihood, gaussian_KL

from splitting import split_based_on_ML, split_based_on_sensitivities, split_randomly, distribute
from parallelization import parallelize

from sklearn.mixture import GaussianMixture

import warnings
warnings.filterwarnings(action = "ignore", category = FutureWarning)

import pickle

from analyze_distribution import plot_coresets

## Data Generation

In [None]:
np.random.seed(123)

mu0 = np.array([0, 0]).reshape((-1, 1))
mu1 = np.array([-1, 1]).reshape((-1, 1))
sigma0 = np.array([[1, 0.9,],
                   [0.9, 1]])
sigma1 = np.array([[1, -0.9,],
                   [-0.9, 1]])

full_means = [mu0, mu1]
full_sigmas = [sigma0, sigma1]

mixture_inds = np.random.choice([0, 1], size = 3000, replace = True)
mixture = []

for i in mixture_inds:
    if i == 0:
        mixture.append(np.random.multivariate_normal(mu0.flatten(), sigma0))
    if i == 1:
       mixture.append(np.random.multivariate_normal(mu1.flatten(), sigma1))

mixture = np.array(mixture).reshape(-1, 2)

plt.scatter(mixture[:, 0], mixture[:, 1])

In [None]:
from sklearn.cluster import KMeans
km = KMeans(2)
cl = km.fit_predict(mixture)
plt.scatter(mixture[:, 0], mixture[:, 1], c = cl)

In [None]:
gm = GaussianMixture(2)
gm.fit(mixture)
cl = gm.fit_predict(mixture)
plt.scatter(mixture[:, 0], mixture[:, 1], c = cl)

## Log-likelihood Definition

In [None]:
# Define log-likelihood

def log_likelihood(params, X, y, weights):
    '''
    Returns:
    ----------
    log_lik: np.ndarray(shape = X.shape[0])
    '''
    probs = params[0]
    mu0 = params[1:3].reshape(-1, 1)
    mu1 = params[3:5].reshape(-1, 1)
 
    ll =  probs * np.diag(gaussian_multivariate_log_likelihood(X.T, mu0, sigma0)).reshape(-1, 1) +\
          (1 - probs) * np.diag(gaussian_multivariate_log_likelihood(X.T, mu1, sigma1)).reshape(-1, 1)
    
    return ll

def summed_log_likelihood(params, X, y, weights):
    return log_likelihood(params, X, y, weights).sum()

def negative_summed_log_likelihood(params, X, y, weights):
    return -summed_log_likelihood(params, X, y, weights)

# https://stats.stackexchange.com/questions/90134/gradient-of-multivariate-gaussian-log-likelihood
def grad_log_likelihood(params, X, y, weights):
    '''
    Returns:
    ----------
    grad_log_lik: np.ndarray(shape = X.shape[1])
    '''
    return None

def log_posterior(params, X, y, weights):
    return weights.T @ log_likelihood(params, X, y, weights)

In [None]:
coreset_sizes = np.arange(100, 310, 10)
len(coreset_sizes)

In [None]:
# https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
# https://math.stackexchange.com/questions/2614267/can-we-solve-kl-divergence-between-gaussian-mixtures-by-thinking-conditional-cas
def mixture_kl_element(mu0, sigma0, mu1, sigma1, w):
    return w * gaussian_KL(sigma0, sigma1, mu0, mu1)

## Testing

In [None]:
x = mixture

# Sequential
fkl_sequential = []
bkl_sequential = []
time_sequential = []

na = {"log_likelihood": log_likelihood,
      "log_likelihood_start_value": np.ones(9),
      "S": int(0.3 * len(x)),
      "log_likelihood_gradient": None,
      "approx": "MCMC",
      "MCMC_subs_size": int(0.7 * len(x)),
      "log_posterior": log_posterior,
      "log_posterior_start_value": np.ones(9)}

for k in range(10):
      np.random.seed(120 + k)
      fkl_sequential_k = []
      bkl_sequential_k = []
      time_sequential_k = []

      for i in coreset_sizes:
            print(k, i)
            start = time.time()
            sbfw = SensitivityBasedFW(x)
            w, I = sbfw.run(k = i, likelihood_gram_matrix = None, norm = "2", norm_attributes = na)
            time_sequential_k.append(time.time() - start)

            # Calculate posterior approximation
            gm = GaussianMixture(2)
            gm.fit((w * mixture)[w.flatten() > 0])
            means = gm.means_
            sigmas = gm.covariances_
            ps = gm.weights_

            means_inds = np.argsort(np.linalg.norm(means, axis = 1))
            means = means[means_inds]
            sigmas = sigmas[means_inds]
            ps = ps[means_inds]

            fkl = 0
            bkl = 0
            for j in range(2):
                  fkl += mixture_kl_element(full_means[j], full_sigmas[j], means[:, j], sigmas[j], ps[j])
                  bkl += mixture_kl_element(means[:, j], sigmas[j], full_means[j], full_sigmas[j], ps[j])

            fkl_sequential_k.append(fkl)
            bkl_sequential_k.append(bkl)

      fkl_sequential.append(fkl_sequential_k)
      bkl_sequential.append(bkl_sequential_k)
      time_sequential.append(time_sequential_k)

print(f"FKL: {fkl_sequential}")
print(f"BKL: {bkl_sequential}")
print(f"Time: {time_sequential}")

In [None]:
fkl_sequential = np.array(fkl_sequential)
bkl_sequential = np.array(bkl_sequential)
time_sequential = np.array(time_sequential)

In [None]:
x = mixture

# Parallel
fkl_parallel = []
bkl_parallel = []
time_parallel = []

na = {"log_likelihood": log_likelihood,
      "log_likelihood_start_value": np.ones(9),
      "S": int(0.3 * 0.1 * len(x)),
      "log_likelihood_gradient": None,
      "approx": "MCMC",
      "MCMC_subs_size": int(0.1 * len(x)),
      "log_posterior": log_posterior,
      "log_posterior_start_value": np.ones(9)}

for k in range(10):
      np.random.seed(120 + k)
      fkl_parallel_k = []
      bkl_parallel_k = []
      time_parallel_k = []

      for i in coreset_sizes:
            print(f"{k}: {i}")
            fkl_parallel_i = []
            bkl_parallel_i = []
            time_parallel_i = []
            for ind, strat in enumerate([split_randomly, split_based_on_ML]):
                  start = time.time()

                  # Step 1: distribute
                  if ind == 0:
                        full_inds = strat(x)
                  elif ind == 1:
                        gm = GaussianMixture(2, n_init = 40)
                        gm.fit(mixture)
                        
                        # Get probability estimates
                        params = np.hstack((gm.weights_.flatten()[0], gm.means_.flatten()))
                        log_liks = log_likelihood(params, mixture, None, None)
                        probs = np.abs(log_liks) / np.sum(np.abs(log_liks))
                        probs = probs.flatten()
                        full_inds = distribute(probs)

                  # Step 2: run
                  w, output = parallelize(alg = SensitivityBasedFW, x = mixture, k = int(i // mp.cpu_count()), norm = "2", na = na, distributed_indices = full_inds)

                  time_parallel_i.append(time.time() - start)

                  # Calculate posterior approximation
                  gm = GaussianMixture(2, n_init = 40)
                  gm.fit((w * mixture)[w.flatten() > 0])
                  means = gm.means_
                  sigmas = gm.covariances_
                  ps = gm.weights_

                  means_inds = np.argsort(np.linalg.norm(means, axis = 1))
                  means = means[means_inds]
                  sigmas = sigmas[means_inds]
                  ps = ps[means_inds]

                  fkl = 0
                  bkl = 0
                  for j in range(2):
                        fkl += mixture_kl_element(full_means[j], full_sigmas[j], means[j, :], sigmas[j], ps[j])
                        bkl += mixture_kl_element(means[j, :], sigmas[j], full_means[j], full_sigmas[j], ps[j])

                  fkl_parallel_i.append(fkl)
                  bkl_parallel_i.append(bkl)

            fkl_parallel_k.append(fkl_parallel_i)
            bkl_parallel_k.append(bkl_parallel_i)
            time_parallel_k.append(time_parallel_i)

      fkl_parallel.append(fkl_parallel_k)
      bkl_parallel.append(bkl_parallel_k)
      time_parallel.append(time_parallel_k)

print(f"FKL: {fkl_parallel}")
print(f"BKL: {bkl_parallel}")
print(f"Time: {time_parallel}")

In [None]:
fkl_parallel = np.array(fkl_parallel)
bkl_parallel = np.array(bkl_parallel)
time_parallel = np.array(time_parallel)

In [None]:
data = {
    "fkl_sequential": fkl_sequential,
    "bkl_sequential": bkl_sequential,
    "time_sequential": time_sequential,
    "fkl_parallel": fkl_parallel,
    "bkl_parallel": bkl_parallel,
    "time_parallel": time_parallel
}

import pickle

with open('../data/multivariate_gaussian_mixture.pickle', 'wb') as file:
    pickle.dump(data, file, protocol = pickle.HIGHEST_PROTOCOL)

## Plot

In [None]:
with open('../data/multivariate_gaussian_mixture.pickle', 'rb') as file:
    data = pickle.load(file)

fkl_sequential = np.array(data['fkl_sequential'])
bkl_sequential = np.array(data['bkl_sequential'])
time_sequential = np.array(data['time_sequential'])
fkl_parallel = np.array(data['fkl_parallel'])
bkl_parallel = np.array(data['bkl_parallel'])
time_parallel = np.array(data['time_parallel'])

In [None]:
klsym_sequential = (fkl_sequential + bkl_sequential) / 2
klsym_parallel = (fkl_parallel + bkl_parallel) / 2

coreset_sizes = np.arange(100, 310, 10)

In [None]:
np.log(np.nanmedian(klsym_sequential, axis = 0))

In [None]:
plt.rcParams.update({'font.size': 22})
fig = plt.figure(figsize = (20, 7))

ax12 = fig.add_subplot(121)

ax13 = fig.add_subplot(222)
ax14 = fig.add_subplot(224, sharex = ax13)

ax12.plot(coreset_sizes, np.log(np.nanmedian(klsym_sequential, axis = 0)), label = 'Sequential', 
           linestyle = "solid", linewidth = 2, color = 'black')
ax12.plot(coreset_sizes, np.log(np.nanmedian(klsym_parallel, axis = 0)[:, 0]), label = 'Random split',
          linestyle = "dashed", linewidth = 2, color = 'dimgray')
ax12.plot(coreset_sizes, np.log(np.nanmedian(klsym_parallel, axis = 0)[:, 1]), label = 'ML split',
          linestyle = "solid", marker = "o", linewidth = 2, color = 'maroon')

ax13.spines['bottom'].set_visible(False)
ax13.xaxis.tick_top()
ax13.tick_params(labeltop = False)
ax14.spines['top'].set_visible(False)
ax14.ticklabel_format(useOffset=False)

ax12.set_xlabel("Coreset size")
ax14.set_xlabel("Coreset size")

fig.text(0.04, 0.5, 'Log KL', va='center', rotation='vertical')
fig.text(0.494, 0.5, 'Seconds', va='center', rotation='vertical')

d = .015

kwargs = dict(transform=ax13.transAxes, color='k', clip_on=False)
ax13.plot((-d, +d), (-d, +d), **kwargs)
ax13.plot((1 - d, 1 + d), (-d, +d), **kwargs)

kwargs.update(transform=ax14.transAxes)
ax14.plot((-d, +d), (1 - d, 1 + d), **kwargs)
ax14.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)

fig.legend()
fig.suptitle('Multivariate Gaussian Mixture')

ax13.plot(coreset_sizes, np.nanmedian(time_sequential, axis = 0), label = 'Sequential',
           linestyle = "solid", linewidth = 2, color = 'black')
ax14.plot(coreset_sizes, np.nanmedian(time_parallel, axis = 0)[:, 0], label = 'Random split',
          linestyle = "dashed", linewidth = 2, color = 'dimgray')
ax14.plot(coreset_sizes, np.nanmedian(time_parallel, axis = 0)[:, 1], label = 'ML split',
          linestyle = "solid", marker = "o", linewidth = 2, color = 'maroon')

ax12.grid()
ax13.grid()
ax14.grid()

# plt.savefig("../plots/multivariate_gaussian_mixture.eps")
plt.show()

In [None]:
import pyx
c = pyx.canvas.canvas()
c.insert(pyx.epsfile.epsfile(0, 0,"../plots/multivariate_gaussian_mixture.eps", align = "tc"))
c.insert(pyx.epsfile.epsfile(0, 0, "../plots/univariate_gaussian_mixture.eps", align = "bc"))
c.writeEPSfile("../plots/all_mixture.eps")

## Distribution

In [None]:
np.random.seed(123)

mu0 = np.array([3, 0]).reshape((-1, 1))
mu1 = np.array([0, 0]).reshape((-1, 1))
sigma0 = np.array([[1, 0.9,],
                   [0.9, 1]])
sigma1 = np.array([[1, -0.9,],
                   [-0.9, 1]])

full_means = [mu0, mu1]
full_sigmas = [sigma0, sigma1]

mixture_inds = np.random.choice([0, 1], size = 1000, replace = True)
mixture = []

for i in mixture_inds:
    if i == 0:
        mixture.append(np.random.multivariate_normal(mu0.flatten(), sigma0))
    if i == 1:
       mixture.append(np.random.multivariate_normal(mu1.flatten(), sigma1))

mixture = np.array(mixture).reshape(-1, 2)

plt.scatter(mixture[:, 0], mixture[:, 1])

In [None]:
x = mixture

# Sequential
fkl_sequential = []
bkl_sequential = []
time_sequential = []

na = {"log_likelihood": log_likelihood,
      "log_likelihood_start_value": np.ones(9),
      "S": int(0.3 * len(x)),
      "log_likelihood_gradient": None,
      "approx": "MCMC",
      "MCMC_subs_size": int(0.7 * len(x)),
      "log_posterior": log_posterior,
      "log_posterior_start_value": np.ones(9)}

for k in range(1):
      np.random.seed(120 + k)
      fkl_sequential_k = []
      bkl_sequential_k = []
      time_sequential_k = []

      w_seq = []

      for i in [100, 200, 300]:
            print(k, i)
            start = time.time()
            sbfw = SensitivityBasedFW(x)
            w, I = sbfw.run(k = i, likelihood_gram_matrix = None, norm = "2", norm_attributes = na)
            time_sequential_k.append(time.time() - start)

            # Calculate posterior approximation
            gm = GaussianMixture(2)
            gm.fit((w * mixture))
            means = gm.means_
            sigmas = gm.covariances_
            ps = gm.weights_

            means_inds = np.argsort(np.linalg.norm(means, axis = 1))
            means = means[means_inds]
            sigmas = sigmas[means_inds]
            ps = ps[means_inds]

            fkl = 0
            bkl = 0
            for j in range(2):
                  fkl += mixture_kl_element(full_means[j], full_sigmas[j], means[:, j], sigmas[j], ps[j])
                  bkl += mixture_kl_element(means[:, j], sigmas[j], full_means[j], full_sigmas[j], ps[j])

            fkl_sequential_k.append(fkl)
            bkl_sequential_k.append(bkl)

            w_seq.append(w)

      fkl_sequential.append(fkl_sequential_k)
      bkl_sequential.append(bkl_sequential_k)
      time_sequential.append(time_sequential_k)

print(f"FKL: {fkl_sequential}")
print(f"BKL: {bkl_sequential}")
print(f"Time: {time_sequential}")

In [None]:
x = mixture

# Parallel
fkl_parallel = []
bkl_parallel = []
time_parallel = []

na = {"log_likelihood": log_likelihood,
      "log_likelihood_start_value": np.ones(9),
      "S": int(0.3 * 0.1 * len(x)),
      "log_likelihood_gradient": None,
      "approx": "MCMC",
      "MCMC_subs_size": int(0.1 * len(x)),
      "log_posterior": log_posterior,
      "log_posterior_start_value": np.ones(9)}

for k in range(1):
      np.random.seed(120 + k)
      fkl_parallel_k = []
      bkl_parallel_k = []
      time_parallel_k = []

      full_inds_i = []
      output_i = []
      w_i = []

      for i in [100, 200, 300]:
            print(f"{k}: {i}")
            fkl_parallel_i = []
            bkl_parallel_i = []
            time_parallel_i = []
            for ind, strat in enumerate([split_randomly, split_based_on_ML]):
                  start = time.time()

                  # Step 1: distribute
                  if ind == 0:
                        full_inds = strat(x)
                  elif ind == 1:
                        gm = GaussianMixture(2, n_init = 20)
                        gm.fit(w * mixture)
                        
                        # Get probability estimates
                        params = np.hstack((gm.weights_.flatten()[0], gm.means_.flatten()))
                        log_liks = log_likelihood(params, mixture, None, None)
                        probs = np.abs(log_liks) / np.sum(np.abs(log_liks))
                        probs = probs.flatten()
                        full_inds = distribute(probs)

                  full_inds_i.append(full_inds)

                  # Step 2: run
                  w, output = parallelize(alg = SensitivityBasedFW, x = x, k = int(i // mp.cpu_count()), norm = "2", na = na, distributed_indices = full_inds)

                  time_parallel_i.append(time.time() - start)

                  # Calculate posterior approximation
                  gm = GaussianMixture(2, n_init = 40)
                  gm.fit((w * mixture))
                  means = gm.means_
                  sigmas = gm.covariances_
                  ps = gm.weights_

                  means_inds = np.argsort(np.linalg.norm(means, axis = 1))
                  means = means[means_inds]
                  sigmas = sigmas[means_inds]
                  ps = ps[means_inds]

                  fkl = 0
                  bkl = 0
                  for j in range(2):
                        fkl += mixture_kl_element(full_means[j], full_sigmas[j], means[:, j], sigmas[j], ps[j])
                        bkl += mixture_kl_element(means[:, j], sigmas[j], full_means[j], full_sigmas[j], ps[j])

                  fkl_parallel_i.append(fkl)
                  bkl_parallel_i.append(bkl)

                  output_i.append(output)
                  w_i.append(w)

            fkl_parallel_k.append(fkl_parallel_i)
            bkl_parallel_k.append(bkl_parallel_i)
            time_parallel_k.append(time_parallel_i)

      fkl_parallel.append(fkl_parallel_k)
      bkl_parallel.append(bkl_parallel_k)
      time_parallel.append(time_parallel_k)

print(f"FKL: {fkl_parallel}")
print(f"BKL: {bkl_parallel}")
print(f"Time: {time_parallel}")

In [None]:
plot_coresets(mixture, w_seq[0], full_inds_i[0], output_i[0], w_i[0], 100, "multivariate_gaussian_mixture")

In [None]:
plot_coresets(mixture, w_seq[1], full_inds_i[1], output_i[1], w_i[1], 200, "multivariate_gaussian_mixture")

In [None]:
plot_coresets(mixture, w_seq[2], full_inds_i[2], output_i[2], w_i[2], 300, "multivariate_gaussian_mixture")