In [6]:
%matplotlib inline
import numpy as np
from scipy.stats import multivariate_normal, norm
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.ticker
from matplotlib import rc
rc('text', usetex=True)

import seaborn as sns

import os

from copy import deepcopy

import tqdm

import pickle


from sklearn.mixture import GaussianMixture
from sklearn.datasets import make_spd_matrix

pd.options.display.max_rows = 1000

In [7]:
outdir = './output/synthetic/univariate/abrupt_mixture'
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [8]:
class Theta:
    def __init__(self, pi, mu, sigma):
        self.pi = pi
        self.mu = mu
        self.sigma = sigma

In [9]:
class SDEM(GaussianMixture):
    def __init__(self, r, alpha, n_components=2, **kwargs):
        super().__init__(n_components=n_components, **kwargs)
        
        self.r = r
        self.alpha = alpha
        self.t = 0
        
        self.means_list = []
        self.means_bar_list = []
        self.covariances_list = []
        self.covariances_bar_list = []
        
    def fit(self, X, y=None):
        super().fit(X, y)
        self.means_bar = np.copy(self.means_)
        self.covariances_bar = np.copy(self.covariances_)
        
    def update(self, x):
        """Employ SDEM algorithm."""
        
        # E-step for weights
        gamma = (1-self.alpha*self.r[t]) * self.predict_proba(x.reshape(1, -1)) + self.alpha*self.r[t]/self.weights_.shape[0]
        gamma = gamma.ravel()
        self.weights_ = (1-self.r[t])*self.weights_ + self.r[t]*gamma
        
        x = x.ravel()
        means, means_bar, covariances, covariances_bar = [], [], [], []
        for i, (w, m, cov) in enumerate(zip(self.weights_, self.means_bar, self.covariances_bar)):
            
            # E-step for mean and covariance
            m = (1-self.r[t])*m + self.r[t]*gamma[i]*x
            cov = (1-self.r[t])*cov + self.r[t]*gamma[i]*np.outer(x,x)
            means_bar.append(m)
            covariances_bar.append(cov)

            # M-step for mean and covariance
            m = m/w
            cov = cov/w - np.outer(m, m)
            means.append(m)
            covariances.append(cov)

        
        self.means_ = np.vstack(means)
        self.covariances_ = np.vstack(covariances)
        self.means_bar = np.vstack(means_bar)
        self.covariances_bar = np.vstack(covariances_bar)

        self.means_list.append(self.means_)
        self.covariances_list.append(self.covariances_)
        self.means_bar_list.append(self.means_bar)
        self.covariances_bar_list.append(self.covariances_bar)
        
        self.t += 1

In [10]:
def compute_tau(X, theta):
    N = X.shape[0]
    K = len(theta.pi)

    tau = np.zeros((N, K))
    for k in range(K):
        d_k = norm(theta.mu[k], theta.sigma[k])
        for i in range(N):
            tau[i, k] = theta.pi[k] * d_k.pdf(X[i])
    
    tau /= np.sum(tau, axis=1, keepdims=True)
    
    return tau

In [11]:
def compute_stat(X, Z):
    K = Z.shape[0]
    D = X.shape[1]

    s1 = np.sum(Z, axis=0)
    s2 = X.T.dot(Z).ravel()
    s3 = (X**2).T.dot(Z).ravel()
    
    return s1, s2, s3

In [12]:
def step_M(s, x, eps=1e-8):
    pi = (s.s1 + eps) / (1.0 + eps * len(s.s1))
    mu = s.s2 / (s.s1 + eps)
    sigma = np.sqrt(s.s3 / (s.s1 + eps) - mu**2)
    return pi, mu, sigma

In [13]:
class SuffStat:
    def __init__(self):
        pass

In [14]:
class SuffStatGMM(SuffStat):
    def __init__(self, K, D, seed=0):
        super().__init__()
        np.random.seed(seed)
        self.s1 = np.random.random(K)
        self.s2 = np.random.random(K)
        self.s3 = np.random.random(K)

In [15]:
def sra(X, theta0, rho, gamma, start=50):
    K = len(theta0.mu)
    N = X.shape[0]
    
    theta_est = np.zeros((N, 3*K+1))
    theta = deepcopy(theta0)
    
    s = SuffStatGMM(len(theta0.pi), 1)
    
    # initialization
    tau = compute_tau(X[:10, :], theta)
    s.s1[:], s.s2[:], s.s3[:] = compute_stat(X[:10, :], tau)
    
    inds = np.arange(N)
    
    for n in range(N):
        ind_n = inds[n]
        
        tau_indiv_new = compute_tau(X[ind_n, :], theta).ravel()
        s2_indiv_new = X[ind_n, :] * tau_indiv_new
        s3_indiv_new = X[ind_n, :]**2 * tau_indiv_new

        if ( np.any(np.isnan(tau_indiv_new))  | ((np.linalg.norm(s2_indiv_new - s.s2) >= gamma) & (n >= start)) ):
            theta_est[n, :] = np.hstack((n, theta.pi, theta.mu, theta.sigma))
            continue
            
        # update the statistics
        s.s1 += rho[n] * (tau_indiv_new - s.s1)
        s.s2 += rho[n] * (s2_indiv_new - s.s2)
        s.s3 += rho[n] * (s3_indiv_new - s.s3)
                
        # M-step
        theta.pi[:], theta.mu[:], theta.sigma[:] = step_M(s, X[ind_n, :])
        theta_est[n, :] = np.hstack((n, theta.pi, theta.mu, theta.sigma))

    return theta_est

In [16]:
def generate_data(N, pi, mu, sigma):
    
    z_list = np.random.choice(np.arange(len(pi)), N, replace=True)
    
    X = np.zeros((N, 1))
    for i, z in enumerate(z_list):
        X_i = np.random.normal(mu[z], sigma[z])
        X[i, :] = X_i
    
    return X

In [17]:
def generate_data_abrupt_single(N, 
                                pi=np.array([1.0]), 
                                mu1=np.array([0.5]), 
                                mu2=np.array([1.0]), 
                                sigma=np.array([1.0]), 
                                alpha=0.01):
    X = np.zeros((2*N, 1))
    Xj_former = generate_data(N, pi, mu1, sigma)
    X[:N, 0] = Xj_former.ravel()
    Xj_latter = generate_data(N, pi, mu2, sigma)
    X[N:, 0] = Xj_latter.ravel()

    idxes_ol = np.random.choice(np.arange(2*N), int(alpha*2*N), replace=False)
    X[idxes_ol, :] = np.random.uniform(-5, 5, len(idxes_ol)).reshape(-1, 1)    
    
    return X

In [18]:
def generate_data_abrupt_mixture(N, 
                                 pi=np.array([0.5, 0.5]),
                                 mu1=np.array([0.5, -0.5]),
                                 mu2=np.array([1.0, -1.0]),
                                 sigma=np.array([0.1, 0.1]),
                                 alpha=0.01):
    X = np.zeros((2*N, 1))
    Xj_former = generate_data(N, pi, mu1, sigma)
    X[:N, 0] = Xj_former.ravel()
    Xj_latter = generate_data(N, pi, mu2, sigma)
    X[N:, 0] = Xj_latter.ravel()

    idxes_ol = np.random.choice(np.arange(2*N), int(alpha*2*N), replace=False)
    X[idxes_ol, :] = np.random.uniform(-5, 5, len(idxes_ol)).reshape(-1, 1)    

    return X

In [19]:
def generate_data_gradual_single(N, 
                                 pi=np.array([1.0]), 
                                 mu1=np.array([0.5]), 
                                 mu2=np.array([1.0]), 
                                 sigma=np.array([1.0]), 
                                 alpha=0.01):
    X = np.zeros((2*N, 1))
    Xj_former = generate_data(N, pi, mu1, sigma)
    X[:N, 0] = Xj_former.ravel()
    Xj_trans = np.linspace(mu1, mu2, 300).ravel() + np.random.normal(0, sigma, 300)
    X[N:(N+300), 0] = Xj_trans
    Xj_latter = generate_data(N-300, pi, mu2, sigma)
    X[N+300:, 0] = Xj_latter.ravel()

    idxes_ol = np.random.choice(np.arange(2*N), int(alpha*2*N), replace=False)
    X[idxes_ol, :] = np.random.uniform(-5, 5, len(idxes_ol)).reshape(-1, 1)    
    
    return X

In [20]:
def generate_data_gradual_mixture(N, 
                                  pi=np.array([0.5, 0.5]),
                                  mu1=np.array([0.5, -0.5]),
                                  mu2=np.array([1.0, -1.0]),
                                  sigma = np.array([0.1, 0.1]),
                                  alpha=0.01):
    X = np.zeros((2*N, 1))
    Xj_former = generate_data(N, pi, mu1, sigma)
    X[:N, 0] = Xj_former.ravel()
    Xj_trans = np.linspace(mu1, mu2, 300).ravel() + np.random.normal(0, sigma, 300)
    X[N:(N+300), 0] = Xj_trans
    Xj_latter = generate_data(N-300, pi, mu2, sigma)
    X[N+300:, 0] = Xj_latter.ravel()

    idxes_ol = np.random.choice(np.arange(2*N), int(alpha*2*N), replace=False)
    X[idxes_ol, :] = np.random.uniform(-5, 5, len(idxes_ol)).reshape(-1, 1)    

    return X

In [21]:
N = 10000

In [22]:
Nsim = 1

In [23]:
pi = np.array([0.2, 0.8])
pi0 = pi.copy()

In [24]:
mean1 = 0.5
mu1 = np.array([mean1, -mean1])

mean2 = 1.0
mu2 = np.array([mean2, -mean2])

In [25]:
sigma = np.array([0.1, 0.1])
sigma0 = np.array([0.2, 0.2])

In [26]:
seed0 = 1

In [27]:
mu0 = [0.1, -0.1]

In [28]:
theta = Theta(pi, mu1, sigma)
theta0 = Theta(pi0, mu0, sigma0)

In [29]:
seed = seed0
np.random.seed(seed)
X = np.zeros((2*N, 1))
Xj_former = generate_data(N, pi, mu1, sigma)
X[:N, 0] = Xj_former.ravel()
Xj_latter = generate_data(N, pi, mu2, sigma)
X[N:, 0] = Xj_latter.ravel()

idxes_ol = np.random.choice(np.arange(N), int(0.01*N))
X[idxes_ol, :] = np.random.uniform(-5, 5, len(idxes_ol)).reshape(-1, 1)

# choosing optimal rho

In [30]:
ratio_list = [0.05, 0.1, 0.5]
M_list = [1.0, 5.0, 10.0]
n_trial = 10
u = 20
alpha = 0.01

In [None]:
results_optrho = []
for trial in tqdm.tqdm(range(n_trial)):
    np.random.seed(trial)
    X = np.zeros((2*N, 1))
    Xj_former = generate_data(N, pi, mu1, sigma)
    X[:N, 0] = Xj_former.ravel()
    Xj_latter = generate_data(N, pi, mu2, sigma)
    X[N:, 0] = Xj_latter.ravel()
    
    idxes_ol = np.random.choice(np.arange(2*N), int(alpha*2*N), replace=False)
    X[idxes_ol, :] = np.random.uniform(-u, u, len(idxes_ol)).reshape(-1, 1)

    for gamma in [1, 3, 5, 10, 15]:
        print('gamma:', gamma)
        for ratio in ratio_list:
            print('\tratio:', ratio)
            for M in M_list:
                print('\t\tM:', M)
                rho = ratio * np.exp(-gamma**2/M**2) / (2.0*gamma)
                print('\t\t\trho:', rho)
                rho_const = np.repeat(rho, X.shape[0])
                theta_est_sra = sra(X, theta0, rho_const, gamma)
                mse_pc = np.mean(np.sum((theta_est_sra[500:1000, 3:5] - mu1)**2, axis=1))
                mse_former = np.sum((theta_est_sra[1000:10000, 3:5] - mu1)**2, axis=1)
                mse_latter = np.sum((theta_est_sra[10000:, 3:5] - mu2)**2, axis=1)
                mse_overall = np.hstack((mse_former, mse_latter))
                print('\t\t\t\tmse:', np.mean(mse_former), np.mean(mse_latter), np.mean(mse_overall))
                results_optrho.append([gamma, ratio, M, rho, mse_pc, np.mean(mse_former), np.mean(mse_latter), np.mean(mse_overall)])

  # This is added back by InteractiveShellApp.init_path()


gamma: 1
	ratio: 0.05
		M: 1.0
			rho: 0.009196986029286059
				mse: 0.027907578678442513 0.7245561410260891 0.39456471675615123
		M: 5.0
			rho: 0.02401973597880808
				mse: 0.00043271915195634077 0.001575684855864951 0.0010342800487503463
		M: 10.0
			rho: 0.024751245843729203
				mse: 0.0004462005925503985 0.0015587095850996358 0.0010317316412605236
	ratio: 0.1
		M: 1.0
			rho: 0.018393972058572117
				mse: 0.0022440253488947306 0.0017832242183856459 0.0020014984381004756
		M: 5.0
			rho: 0.04803947195761616
				mse: 0.0008908513343960751 0.9206440817257647 0.48497149890880076
		M: 10.0
			rho: 0.049502491687458405
				mse: 0.0009203454778150664 0.9227599580438512 0.4860990889336237
	ratio: 0.5
		M: 1.0
			rho: 0.09196986029286058
				mse: 0.0018273787677678345 0.9554608714798613 0.5037397433530802
		M: 5.0
			rho: 0.2401973597880808


  return np.exp(-x**2/2.0) / _norm_pdf_C


				mse: 0.26603789766628516 4.9824601939844815 2.7483654220442837
		M: 10.0
			rho: 0.24751245843729203
				mse: 0.28361769054884756 4.970908758589165 2.750612989517436
gamma: 3
	ratio: 0.05
		M: 1.0
			rho: 1.028415034055663e-06
				mse: 0.038395640910420754 0.7759550142911856 0.42658478479503387
		M: 5.0
			rho: 0.005813969383925258
				mse: 0.00015203757138437465 0.0044937146372529425 0.0024371307639467787
		M: 10.0
			rho: 0.007616093210593569
				mse: 0.00022143909787111833 0.003518122910415823 0.0019565358413156995
	ratio: 0.1
		M: 1.0
			rho: 2.056830068111326e-06


In [None]:
results_optrho_df = pd.DataFrame(results_optrho, columns=['gamma', 'ratio', 'M', 'rho', 'pc', 'former', 'latter', 'overall'])

In [None]:
results_optrho_df

In [None]:
results_optrho_df.groupby(['gamma', 'ratio', 'M', 'rho'])['pc'].agg([np.mean, np.std])

In [None]:
with open(os.path.join(outdir, 'results_optrho_df.pkl'), 'wb') as f:
    pickle.dump(results_optrho_df, f)

In [None]:
results_best = results_optrho_df.loc[
    ((results_optrho_df['gamma'] == 1) & (results_optrho_df['ratio'] == 0.05) & (results_optrho_df['M'] == 5.0) ) | 
    ((results_optrho_df['gamma'] == 3) & (results_optrho_df['ratio'] == 0.1) & (results_optrho_df['M'] == 5.0) ) |
    ((results_optrho_df['gamma'] == 5) & (results_optrho_df['ratio'] == 0.5) & (results_optrho_df['M'] == 5.0) ) | 
    ((results_optrho_df['gamma'] == 10) & (results_optrho_df['ratio'] == 0.5) & (results_optrho_df['M'] == 10.0) ) |
    ((results_optrho_df['gamma'] == 15) & (results_optrho_df['ratio'] == 0.5) & (results_optrho_df['M'] == 10.0) ) 
]

In [None]:
fig, axes = plt.subplots(5, 1, sharex=True, figsize=(6, 7), dpi=200)

sns.pointplot(data=results_best, x='gamma', y='rho', ax=axes[0])
sns.pointplot(data=results_best, x='gamma', y='pc', ax=axes[1])
sns.pointplot(data=results_best, x='gamma', y='former', ax=axes[2])
sns.pointplot(data=results_best, x='gamma', y='latter', ax=axes[3])
sns.pointplot(data=results_best, x='gamma', y='overall', ax=axes[4])

axes[0].set_title('(a)', fontsize=12)
axes[0].set_xlabel(r'$\gamma$', fontsize=12)
axes[0].set_ylabel(r'$\rho$', fontsize=12)
axes[0].grid()

axes[1].set_title('(b)', fontsize=12)
axes[1].set_yscale('log')
axes[1].set_xlabel(r'$\gamma$', fontsize=12)
axes[1].set_ylabel(r'$S_{\mathrm{eval}}$', fontsize=12)
axes[1].grid()

axes[2].set_title('(c)', fontsize=12)
axes[2].set_yscale('log')
axes[2].set_xlabel(r'$\gamma$', fontsize=12)
axes[2].set_ylabel(r'$S_{\mathrm{bc}}$', fontsize=12)
axes[2].grid()

axes[3].set_title('(d)', fontsize=12)
axes[3].set_yscale('log')
axes[3].set_xlabel(r'$\gamma$', fontsize=12)
axes[3].set_ylabel(r'$S_{\mathrm{ac}}$', fontsize=12)
axes[3].grid()

axes[4].set_title('(e)', fontsize=12)
axes[4].set_yscale('log')
axes[4].set_xlabel(r'$\gamma$', fontsize=12)
axes[4].set_ylabel(r'$S_{\mathrm{tot}}$', fontsize=12)


plt.rc('font', family='serif')
axes[4].set_xticklabels([1, 3, 5, 10, 15])

axes[4].grid()


plt.close(2)
plt.close(3)
plt.close(4)
plt.close(5)
plt.close(6)

plt.tight_layout()
plt.savefig(os.path.join(outdir, 'gamma_rho_tradeoff.pdf'), dpi=200, bbox_inches='tight')
plt.savefig(os.path.join(outdir, 'gamma_rho_tradeoff.eps'), dpi=200, bbox_inches='tight')
plt.savefig(os.path.join(outdir, 'gamma_rho_tradeoff.png'), dpi=200, bbox_inches='tight')