# Bayesian linear and GP regression

## Load the NOAA CO2 file

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Read all lines, then drop comment lines starting with '#'
df = pd.read_fwf(
    "/work/scj-gdrive/co2.txt",
    widths=[6, 6, 12, 10, 10],
    header=None,
    comment="#",
    names=["year", "month", "t", "y", "trend"]
)

# Drop any non-numeric rows (e.g. leftover 'year month decimal ...' line)
df = df[pd.to_numeric(df["t"], errors="coerce").notna()].copy()
df["t"] = df["t"].astype(float)
df["y"] = df["y"].astype(float)

# Time and data
t = df["t"].values
y = df["y"].values

# Training mask and splits
mask_train = t <= 2007 + 8/12         # shape (N,)
t_train = t[mask_train]               # shape (N_train,)


## Mean and covariance

In [4]:
Phi = np.c_[t,np.ones(len(t))]
m0, S0 = np.array([0.,360.]), np.diag([1e4,1e4])
beta = 1.0
SN=np.linalg.inv(np.linalg.inv(S0)+beta*Phi.T@Phi) 
mN=SN@(np.linalg.inv(S0)@m0 + beta*Phi.T@y)

print('Mean:',mN)
print('Cov:',SN)
print('Std:',np.sqrt(np.diag(SN)))

## MAP estimates 

In [7]:
sigma2 = 1.0
beta = 1.0 / sigma2

a_MAP, b_MAP = mN
print("a_MAP:", a_MAP)
print("b_MAP:", b_MAP)

# Compute residuals g_obs(t)
g_obs = y - (a_MAP * t + b_MAP)
g_train = g_obs[mask_train]               # shape (N_train,)

# Quick diagnostics (optional)
print("Residual mean:", np.mean(g_obs))
print("Residual std:", np.std(g_obs))

# Plot residuals over time
plt.figure(figsize=(10, 4))
plt.plot(df["t"].values, g_obs, "b-", markersize=3)
plt.axhline(0.0, color="r", linestyle="--", linewidth=1)
plt.xlabel("Calendar time (decimal year)")
plt.ylabel("Residual g_obs(t) [ppm]")
plt.title("CO2 residuals g_obs(t) = y(t) - (a_MAP t + b_MAP)")
plt.grid(True)
plt.tight_layout()
plt.show()

## Generate samples drawn from a GP

In [10]:
def sample_gp(x, kernel, jitter=1e-6, random_state=None):
    """
    Sample from a zero-mean Gaussian process prior on given inputs x.

    Parameters
    ----------
    x : array-like, shape (N,) or (N, d)
        Input locations.
    kernel : callable
        Function k(x1, x2) returning the covariance between x1 and x2.
        Must accept two arrays of shape (N, d) and (M, d) and return (N, M).
    jitter : float
        Small diagonal term added for numerical stability.
    random_state : int or np.random.RandomState, optional

    Returns
    -------
    f : ndarray, shape (N,)
        One sample of function values f(x) from GP(0, k).
    """
    x = np.atleast_2d(x)
    if x.shape[0] == 1 and x.shape[1] > 1:
        x = x.T   # ensure shape (N, d)

    N = x.shape[0]

    # 1. Build covariance matrix K_ij = k(x_i, x_j)
    K = kernel(x, x)          # shape (N, N)

    # 2. Add jitter for numerical stability
    K = K + jitter * np.eye(N)

    # 3. Sample from multivariate normal with mean 0 and covariance K
    rng = np.random if random_state is None else np.random.RandomState(random_state)
    f = rng.multivariate_normal(mean=np.zeros(N), cov=K)

    return f

## Test hyperparameters

In [13]:
def kernel_st(s, t, theta, tau, sigma, phi, eta, zeta):
    """
    s, t : arrays of shape (N,) and (M,) (1D inputs)
    returns K of shape (N,M)
    """
    s = np.atleast_1d(s)
    t = np.atleast_1d(t)
    S, T = np.meshgrid(s, t, indexing="ij")   # S_ij = s_i, T_ij = t_j
    diff = S - T

    # Periodic part
    arg_periodic = np.pi * diff / tau
    K_per = np.exp(-2.0 * np.sin(arg_periodic)**2 / sigma**2)

    # RBF part
    K_rbf = np.exp(-0.5 * (diff**2) / eta**2)

    # Combine with scales
    K = theta**2 * (K_per + phi**2 * K_rbf)

    # Add noise on the diagonal if shapes match (square matrix when s==t)
    if s.shape == t.shape and np.allclose(s, t):
        K = K + (zeta**2) * np.eye(len(s))

    return K

def make_kernel(theta, tau, sigma, phi, eta, zeta):
    return lambda s, t: kernel_st(s, t, theta, tau, sigma, phi, eta, zeta)

# ----- plot several sample functions -----
x = np.linspace(0, 5.0, 400)  # input grid

theta = 1.0
tau   = 1.0    # period
sigma = 0.3    # periodic lengthscale
phi   = 0.5    # weight of RBF term
eta   = 1.0    # RBF lengthscale
zeta  = 0.1    # noise scale

ker = make_kernel(theta, tau, sigma, phi, eta, zeta)

plt.figure(figsize=(8, 4))
for m in range(5):
    f = sample_gp(x, ker)
    plt.plot(x, f, alpha=0.7)

plt.xlabel("x")
plt.ylabel("f(x)")
plt.title("Samples from GP with periodic + RBF kernel")
plt.grid(True)
plt.tight_layout()
plt.show()

### Minimise the negative log marginal likelihood to optimise hyperparameters

In [14]:
def nlml(p, t_train, g_train, jitter=1e-6):
    log_theta, log_tau, log_sigma, log_phi, log_eta, log_zeta = p
    theta = np.exp(log_theta)
    tau   = np.exp(log_tau)
    sigma = np.exp(log_sigma)
    phi   = np.exp(log_phi)
    eta   = np.exp(log_eta)
    zeta  = np.exp(log_zeta)

    K = kernel_st(t_train, t_train, theta, tau, sigma, phi, eta, zeta)
    K = K + jitter * np.eye(len(t_train))

    # Cholesky for stability: K = L L^T
    L = np.linalg.cholesky(K)
    # Solve K^{-1} g via two triangular solves
    alpha = np.linalg.solve(L.T, np.linalg.solve(L, g_train))

    data_fit = 0.5 * g_train @ alpha
    complexity = np.sum(np.log(np.diag(L)))  # 0.5 * log|K|, up to factor
    const = 0.5 * len(t_train) * np.log(2 * np.pi)
    return data_fit + complexity + const

from scipy.optimize import minimize  # if available in your environment

# initial guess in log-space (hand-tuned)
p0 = np.log([2.0, 1.0, 0.5, 0.5, 3.0, 0.4])  # [theta, tau, sigma, phi, eta, zeta]

res = minimize(
    nlml,
    p0,
    args=(t_train, g_train),
    method="L-BFGS-B",
    options={"maxiter": 100}
)

p_opt = res.x
theta_opt, tau_opt, sigma_opt, phi_opt, eta_opt, zeta_opt = np.exp(p_opt)
print("Optimized hyperparameters:")
print("theta, tau, sigma, phi, eta, zeta =", theta_opt, tau_opt, sigma_opt, phi_opt, eta_opt, zeta_opt)

Optimized hyperparameters:
theta, tau, sigma, phi, eta, zeta = 2.589086891839971 0.9990816088980068 1.7718435610262389 0.6563166765210973 1.2616660220940161 0.33558552328185726

## 0-mean GP

In [15]:
import numpy as np

def build_K(t1, t2, theta, tau, sigma, phi, eta, zeta):
    """Covariance matrix using your kernel k(s,t)."""
    t1 = np.atleast_1d(t1)
    t2 = np.atleast_1d(t2)
    S, T = np.meshgrid(t1, t2, indexing="ij")
    diff = S - T

    # periodic term
    K_per = np.exp(-2.0 * np.sin(np.pi * diff / tau)**2 / sigma**2)
    # RBF term
    K_rbf = np.exp(-0.5 * diff**2 / eta**2)

    K = theta**2 * (K_per + phi**2 * K_rbf)
    if t1.shape == t2.shape and np.allclose(t1, t2):
        K = K + zeta**2 * np.eye(len(t1))
    return K

def fit_gp_residuals(t_train, g_train, theta, tau, sigma, phi, eta, zeta, jitter=1e-6):
    """Precompute training covariance inverse etc. for residual GP."""
    K = build_K(t_train, t_train, theta, tau, sigma, phi, eta, zeta)
    K = K + jitter * np.eye(len(t_train))
    K_inv = np.linalg.inv(K)
    alpha = K_inv @ g_train  # used for predictive mean
    return {"theta": theta, "tau": tau, "sigma": sigma,
            "phi": phi, "eta": eta, "zeta": zeta,
            "t_train": t_train, "K_inv": K_inv, "alpha": alpha}

def gp_mean(t_test, gp_params):
    """Predictive mean of residual g(t) at test points."""
    th = gp_params
    t_train = th["t_train"]
    K_star = build_K(t_test, t_train,
                     th["theta"], th["tau"], th["sigma"],
                     th["phi"], th["eta"], th["zeta"])
    # m_g = K_* K^{-1} g = K_* alpha
    return K_star @ th["alpha"]

def gp_std(t_test, gp_params):
    """Predictive std of residual g(t) at test points."""
    th = gp_params
    t_train = th["t_train"]
    K_inv = th["K_inv"]

    K_star = build_K(t_test, t_train,
                     th["theta"], th["tau"], th["sigma"],
                     th["phi"], th["eta"], th["zeta"])
    K_starstar = build_K(t_test, t_test,
                         th["theta"], th["tau"], th["sigma"],
                         th["phi"], th["eta"], th["zeta"])
    # Σ_g = K** - K_* K^{-1} K_*^T
    cov = K_starstar - K_star @ K_inv @ K_star.T
    var = np.clip(np.diag(cov), 0.0, np.inf)   # numerical safety
    return np.sqrt(var)

In [16]:
ker = lambda s, u: kernel_st(s, u, theta_opt, tau_opt, sigma_opt, phi_opt, eta_opt, zeta_opt)

# zero-mean prior sample
fig, axes = plt.subplots(1, 3, figsize=(12,4), sharey=True)

for ax, (t0, t1) in zip(
    axes,
    [(1980,1990), (1990,2000), (2000,2010)]
):
    mask = (t >= t0) & (t < t1)
    t_win = t[mask]
    for _ in range(3):
        ax.plot(t_win, sample_gp(t_win, ker), alpha=0.7)
    ax.axhline(0, color="g", ls="--")
    ax.set_title(f"{t0}-{t1}")
    ax.grid(True)

axes[0].set_ylabel("g(t)")
for ax in axes:
    ax.set_xlabel("Year")
plt.tight_layout()
plt.show()


## Extrapolation

In [48]:
# Define test times: Sept 2007 to Dec 2020 (monthly)
t_test = np.arange(2007 + 8/12, 2020 + 12/12 + 1e-6, 1/12)

# Compute predictive mean / std of residual g(t) at t_test
gp_params = fit_gp_residuals(t_train, g_train,
                             theta_opt, tau_opt, sigma_opt, phi_opt, eta_opt, zeta_opt)
m_g = gp_mean(t_test, gp_params)
s_g = gp_std(t_test, gp_params)

# Turn residual predictions into CO2 predictions
f_mean = a_MAP * t_test + b_MAP + m_g
f_std  = s_g                   # var[f] = var[g] under this model

# Plot observed CO2 and extrapolated mean ± 1 std
plt.figure(figsize=(10, 4))
plt.plot(t, y, "m-", markersize=2, label="Observed CO$_2$")

# extrapolated GP mean
plt.plot(t_test, f_mean, "C0-", label="GP extrapolated mean")

# 1 std error bars as a shaded band
plt.fill_between(
    t_test,
    f_mean - f_std,
    f_mean + f_std,
    color="C0",
    alpha=0.2,
    label="±1 std (GP)"
)

plt.xlabel("Year")
plt.ylabel("CO$_2$ concentration (ppm)")
plt.title("CO$_2$ with linear trend + GP residual extrapolation")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# =========================
# Define untrained vs trained hyperparameters
# =========================
# hand-chosen "untrained" values
theta_u, tau_u, sigma_u, phi_u, eta_u, zeta_u = 2.0, 1.0, 0.5, 0.5, 3.0, 0.4

# "trained" values = optimised hyperparameters

# =========================
# Fit GP for untrained and trained cases
# =========================
gp_un = fit_gp_residuals(t_train, g_train,
                         theta_u, tau_u, sigma_u, phi_u, eta_u, zeta_u)
gp_tr = fit_gp_residuals(t_train, g_train,
                         theta_opt, tau_opt, sigma_opt, phi_opt, eta_opt, zeta_opt)

# Predict residuals at test times
m_g_un = gp_mean(t_test, gp_un)
s_g_un = gp_std(t_test, gp_un)

m_g_tr = gp_mean(t_test, gp_tr)
s_g_tr = gp_std(t_test, gp_tr)

# Convert to CO2
f_un = a_MAP * t_test + b_MAP + m_g_un
f_tr = a_MAP * t_test + b_MAP + m_g_tr

# =========================
# Plot: trained vs untrained extrapolation in one block
# =========================
fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

# LEFT: untrained hyperparameters
ax = axes[0]
ax.plot(t, y, "b.", ms=2, label="Observed CO$_2$")
ax.plot(t_test, f_un, "c-", label="Untrained mean")
ax.fill_between(t_test, f_un - s_g_un, f_un + s_g_un,
                color="C0", alpha=0.2, label="±1 std")
ax.set_title("Untrained hyperparameters")
ax.set_xlabel("Year")
ax.set_ylabel("CO$_2$ (ppm)")
ax.grid(True)
ax.legend()

# RIGHT: trained hyperparameters
ax = axes[1]
ax.plot(t, y, "b.", ms=2, label="Observed CO$_2$")
ax.plot(t_test, f_tr, "m-", label="Trained mean")
ax.fill_between(t_test, f_tr - s_g_tr, f_tr + s_g_tr,
                color="C1", alpha=0.2, label="±1 std")
ax.set_title("Trained hyperparameters")
ax.set_xlabel("Year")
ax.grid(True)
ax.legend()

plt.tight_layout()
plt.show()

# Mean-field learning

In [6]:
import numpy as np
import matplotlib.pyplot as plt

N=400  # number of data points - you can increase this if you want to
       # learn better features (but it will take longer).
D=16   # dimensionality of the data

np.random.seed(0)

# Define the basic shapes of the features

m1 = [0, 0, 1, 0,
      0, 1, 1, 1,
      0, 0, 1, 0,
      0, 0, 0, 0]

m2 = [0, 1, 0, 0,
      0, 1, 0, 0,
      0, 1, 0, 0,
      0, 1, 0, 0]

m3 = [1, 1, 1, 1,
      0, 0, 0, 0,
      0, 0, 0, 0,
      0, 0, 0, 0]

m4 = [1, 0, 0, 0,
      0, 1, 0, 0,
      0, 0, 1, 0,
      0, 0, 0, 1] 

m5 = [0, 0, 0, 0,
      0, 0, 0, 0,
      1, 1, 0, 0,
      1, 1, 0, 0] 

m6 = [1, 1, 1, 1,
      1, 0, 0, 1,
      1, 0, 0, 1,
      1, 1, 1, 1] 

m7 = [0, 0, 0, 0,
      0, 1, 1, 0,
      0, 1, 1, 0,
      0, 0, 0, 0]

m8 = [0, 0, 0, 1,
      0, 0, 0, 1,
      0, 0, 0, 1,
      0, 0, 0, 1]

nfeat = 8 # number of features
rr = 0.5 + np.random.rand(nfeat, 1) * 0.5 # weight of each feature between 0.5 and 1
mut = np.array([rr[0] * m1, rr[1] * m2, rr[2] * m3, rr[3] * m4, rr[4] * m5,
                rr[5] * m6, rr[6] * m7, rr[7] * m8])
s = np.random.rand(N, nfeat) < 0.3 # each feature occurs with prob 0.3 independently 

# Generate Data - The Data is stored in Y

Y = np.dot(s, mut) + np.random.randn(N, D) * 0.1 # some Gaussian noise is added 

nrows = 13
for k in range(16):
    plt.subplot(4, 4, k + 1)
    plt.imshow(np.reshape(Y[k], (4, 4)), cmap=plt.gray(), interpolation='none')
    plt.axis('off')

plt.show()

## E-Step

In [7]:
def log_gauss_pdf(x, mu, sigma2):
    """Log multivariate Gaussian density N(x | mu, sigma2 * I)"""
    D = len(x)
    return -0.5 * D * np.log(2 * np.pi * sigma2) - 0.5 / sigma2 * np.sum((x - mu)**2)

def compute_lower_bound(lambda_n, x_n, mu_kd, sigma2, pie):
    """
    Compute variational lower bound F for one data point
    mu_kd: K x D matrix of factor means
    """
    K, D = mu_kd.shape
    
    # Entropy term H[q(s)] = -E[log q(s)]
    entropy = np.sum(lambda_n * np.log(lambda_n + 1e-12) + 
                     (1-lambda_n) * np.log(1-lambda_n + 1e-12))
    
    # E[log p(s|pie)]
    log_prior_exp = np.sum(lambda_n * np.log(pie + 1e-12) + 
                          (1-lambda_n) * np.log(1-pie + 1e-12))
    
    # E[mu_s] = lambda_n @ mu_kd  (K -> D)
    E_mu_s = np.dot(lambda_n, mu_kd)
    
    # Var(mu_s_d) = sum_k lambda_k*(1-lambda_k)*mu_kd[k,d]^2
    var_mu_s = np.sum((lambda_n * (1 - lambda_n))[:, np.newaxis] * mu_kd**2, axis=0)
    
    # E[log p(x|s)] using total variance sigma2 + Var(mu_s)
    log_like_exp = log_gauss_pdf(x_n, E_mu_s, sigma2 + np.mean(var_mu_s))
    
    return log_prior_exp + log_like_exp + entropy

def mean_field_update(lambda_n, x_n, mu_kd, sigma2, pie):
    """Fixed-point update lambda_i <- argmax F[lambda]"""
    K, D = mu_kd.shape
    new_lambda = np.zeros(K)

    for i in range(K):
        # pie[i] must be scalar, not array of shape (1,)
        log_prior_odds = float(np.log(pie[i] / (1.0 - pie[i] + 1e-12)))

        lambda_minus_i = lambda_n.copy()
        lambda_minus_i[i] = 0.0
        E_mu_minus_i = np.dot(lambda_minus_i, mu_kd)      # (D,)

        residual = x_n - E_mu_minus_i                    # (D,)
        mui = mu_kd[i]                                   # (D,)

        llr = (residual @ mui) / float(sigma2) \
              - 0.5 * (mui @ mui) / float(sigma2)

        logit = float(log_prior_odds + llr)
        new_lambda[i] = 1.0 / (1.0 + np.exp(-logit))

    return new_lambda


def EStep(X, mu, sigma2, pie, lambda0, maxsteps=100, tol=1e-8):
    """
    Mean-field variational inference (E-step of variational EM).

    Args:
        X: N x D data matrix
        mu: D x K matrix of factor means
        sigma2: scalar noise variance
        pie: 1 x K vector of Bernoulli priors π_i
        lambda0: N x K initial variational parameters
        maxsteps: maximum coordinate ascent iterations
        tol: convergence tolerance on change in total F

    Returns:
        lambda: N x K final variational parameters q_n(s_i=1) = λ_{n,i}
        F: array of lower bound values over iterations
    """
    N, D = X.shape
    K = mu.shape[1]

    # Internally use mu_kd: K x D
    mu_kd = mu.T

    # Initialize
    lambda_n = lambda0.copy().astype(np.float64)
    F_history = np.zeros(maxsteps)

    for step in range(maxsteps):
        # Old lower bound
        F_old = sum(
            compute_lower_bound(lambda_n[n], X[n], mu_kd, sigma2, pie)
            for n in range(N)
        )

        # Coordinate ascent: cycle through data points
        for n in range(N):
            x_n = X[n]  # n is defined here
            lambda_n[n] = mean_field_update(lambda_n[n], x_n, mu_kd, sigma2, pie)

        # New lower bound
        F_new = sum(
            compute_lower_bound(lambda_n[n], X[n], mu_kd, sigma2, pie)
            for n in range(N)
        )

        F_history[step] = F_new

        # Convergence check
        delta_F = abs(F_new - F_old)
        if delta_F < tol:
            print(f"Converged after {step+1} steps (ΔF = {delta_F:.2e})")
            break

        if step % 20 == 0 or step < 5:
            print(f"Step {step}: F = {F_new:.4f} (ΔF = {delta_F:.2e})")

    return lambda_n, F_history[:step+1]

## M-Step

In [8]:
def MStep(X, ES, ESS, eps=1e-6):
    """
    mu, sigma, pie = MStep(X,ES,ESS)

    Inputs:
    -----------------
           X: shape (N, D) data matrix
          ES: shape (N, K) E_q[s]
         ESS: shape (K, K) sum over data points of E_q[ss'] (N, K, K)
                           if E_q[ss'] is provided, the sum over N is done for you.

    Outputs:
    --------
          mu: shape (D, K) matrix of means in p(y|{s_i},mu,sigma)
       sigma: shape (,)    standard deviation in same
         pie: shape (1, K) vector of parameters specifying generative distribution for s
    """
    N, D = X.shape
    if ES.shape[0] != N:
        raise TypeError("ES must have the same number of rows as X")
    K = ES.shape[1]

    if ESS.shape == (N, K, K):
        ESS = np.sum(ESS, axis=0)
    if ESS.shape != (K, K):
        raise TypeError("ESS must be square and have the same number of columns as ES")

    # Add small ridge to avoid singularity
    ESS_reg = ESS + eps * np.eye(K)

    # μ = (ESS^-1 ES^T X)^T  -> use solve instead of explicit inverse
    mu = np.linalg.solve(ESS_reg, ES.T @ X).T   # (D,K)

    # σ update (safe version)
    num = (
        np.trace(X.T @ X) +
        np.trace(mu.T @ mu @ ESS) -
        2.0 * np.trace(ES.T @ X @ mu)
    )
    sigma2 = max(num / (N * D), 1e-12)
    sigma = float(np.sqrt(sigma2))

    pie = ES.mean(axis=0)                       # (K,)

    return mu, sigma, pie

Write two or three sentences discussing how the solution relates to linear regression and why.

## Combined EM Function

In [9]:
def LearnBinFactors(X, K, iterations=100, tol=1e-8, verbose=True):
    """
    Full variational EM for binary latent factor model.
    Returns:
        mu : (D, K)
        sigma : scalar
        pie : (K,)
        F_history : ELBO over EM iterations
    """
    N, D = X.shape

    # Initialise parameters
    mu = 0.1 * np.random.randn(D, K)
    sigma = 1.0
    pie = 0.5 * np.ones(K)
    lambda_n = 0.5 * np.ones((N, K))

    F_history = np.zeros(iterations)
    em_done = 0  # how many EM steps actually run

    for em_iter in range(iterations):
        # === E-STEP via EStep() ===
        lambda_n, F_path = EStep(
            X, mu, sigma**2, pie, lambda_n,
            maxsteps=50, tol=1e-8
        )
        F_new = F_path[-1]                 # current ELBO
        F_history[em_iter] = F_new

        # === M-STEP ===
        ESS_diag = np.sum(lambda_n * (1 - lambda_n), axis=0)
        ESS = np.diag(ESS_diag)

        mu_old, sigma_old, pie_old = mu.copy(), sigma, pie.copy()
        mu, sigma, pie = MStep(X, lambda_n, ESS)

        # === Monotonicity check ===
        if em_iter > 0 and F_new < F_history[em_iter - 1]:
            if verbose:
                print(f"WARNING: F decreased from {F_history[em_iter-1]:.4f} to {F_new:.4f}")
                print("  Reverting to previous parameters")
            mu, sigma, pie = mu_old, sigma_old, pie_old
            F_history[em_iter] = F_history[em_iter - 1]
        else:
            if em_iter > 0:
                delta_F = abs(F_new - F_history[em_iter - 1])
                if delta_F < tol and verbose:
                    print(f"Converged at EM iteration {em_iter+1}: ΔF = {delta_F:.2e}")
                    em_done = em_iter + 1
                    break

        if verbose and em_iter % 10 == 0:
            print(f"EM {em_iter}: F={F_history[em_iter]:.4f}, σ={sigma:.4f}, π={pie.round(3)}")

        em_done = em_iter + 1  # update if we didn't break

    if verbose:
        print("\n=== Final Results ===")
        print(f"Converged in {em_done} EM iterations")
        print(f"Final F: {F_history[em_done-1]:.4f}")
        print(f"σ: {sigma:.4f}")
        print(f"π: {pie}")
        print(f"μ shape: {mu.shape}")

    return mu, sigma, pie, F_history[:em_done]

## Run on genimages.py

In [10]:
X = Y     # your data matrix, shape (N, 16)
K_true = nfeat
N, D = X.shape                    # D should be 16
K = 8                            

# 2. Learn binary latent factor model
mu, sigma, pie, F_history = LearnBinFactors(X, K, iterations=100)

# 3. Visualise each μ as a 4×4 image
for k in range(K):
    plt.subplot(2, K//2, k+1)
    plt.imshow(mu[:, k].reshape(4, 4), cmap="gray", vmin=mu.min(), vmax=mu.max())
    plt.axis("off")
plt.suptitle("Learned μ features (4×4)")
plt.tight_layout()
plt.show()

Step 0: F = -12808.4334 (ΔF = 2.31e+02)
Step 1: F = -12814.6780 (ΔF = 6.24e+00)
Step 2: F = -12814.4817 (ΔF = 1.96e-01)
Step 3: F = -12814.4878 (ΔF = 6.17e-03)
Step 4: F = -12814.4876 (ΔF = 2.49e-04)
Converged after 9 steps (ΔF = 6.58e-10)
EM 0: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2448358160419534.5000 (ΔF = 2.45e+15)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
/tmp/ipykernel_853/4288592019.py:52: RuntimeWarning: overflow encountered in exp
  new_lambda[i] = 1.0 / (1.0 + np.exp(-logit))
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 10: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 20: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 30: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 40: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 50: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 60: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 70: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 80: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
EM 90: F=-12814.4876, σ=0.0000, π=[0.436 0.542 0.318 0.509 0.494 0.526 0.458 0.351]
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters
Step 0: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 1: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 2: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 3: F = -99868699963097168.0000 (ΔF = 9.74e+16)
Step 4: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 20: F = -2439559147201384.0000 (ΔF = 9.74e+16)
Step 40: F = -2439559147201384.0000 (ΔF = 9.74e+16)
WARNING: F decreased from -12814.4876 to -99868699963097168.0000
  Reverting to previous parameters

=== Final Results ===
Converged in 100 EM iterations
Final F: -12814.4876
σ: 0.0000
π: [0.43637231 0.54201043 0.31785741 0.50949573 0.4943884  0.52550078
 0.45752202 0.35095216]
μ shape: (16, 8)

## Variational approximation for just the first data point

In [145]:
def stable_sigmoid(x):
        # handles large ±x safely
        return np.where(
            x >= 0,
            1.0 / (1.0 + np.exp(-x)),
            np.exp(x) / (1.0 + np.exp(x))
        )

def MeanField_single(x, mu, sigma, pie, lambda0, maxsteps=200, tol=1e-10):
    """
    Run mean-field VI for a single data point x (N=1).
    Returns:
        lambda_hist: T x K
        F_hist     : length-T array of ELBO values
    """
    X = x.reshape(1, -1)   # 1 x D
    N, D = X.shape
    K = mu.shape[1]
    sigma2 = max(sigma**2, 1e-6)
    
    # Internal helpers
    def log_gauss_pdf(x, m, s2):
        D = len(x)
        return -0.5*D*np.log(2*np.pi*s2) - 0.5/s2 * np.sum((x - m)**2)

    def compute_F(lmbda):
        mu_kd = mu.T                     # K x D
        # Entropy
        H = np.sum(lmbda*np.log(lmbda+1e-12) +
                   (1-lmbda)*np.log(1-lmbda+1e-12))
        # Prior
        log_prior = np.sum(lmbda*np.log(pie+1e-12) +
                           (1-lmbda)*np.log(1-pie+1e-12))
        # Likelihood term
        E_mu = lmbda @ mu_kd             # D
        var_mu = np.sum((lmbda*(1-lmbda))[:, None] * mu_kd**2, axis=0)
        log_like = log_gauss_pdf(x, E_mu, sigma2 + np.mean(var_mu))
        return log_prior + log_like + H

    def mf_update(lmbda):
        mu_kd = mu.T
        new_lmbda = np.zeros_like(lmbda)
        for i in range(K):
            log_prior_odds = np.log(pie[i] / (1-pie[i] + 1e-12))
            lmbda_minus = lmbda.copy()
            lmbda_minus[i] = 0.0
            E_mu_minus = lmbda_minus @ mu_kd
            resid = x - E_mu_minus
            llr = (resid @ mu_kd[i]) / sigma2 - 0.5*(mu_kd[i] @ mu_kd[i]) / sigma2
            logit = log_prior_odds + llr
            new_lmbda[i] = stable_sigmoid(logit)
        return new_lmbda

    # Coordinate ascent
    lmbda = lambda0.copy()
    F_hist = []
    lambda_hist = []
    for t in range(maxsteps):
        F_old = compute_F(lmbda)
        lambda_hist.append(lmbda.copy())
        F_hist.append(F_old)
        new_lmbda = mf_update(lmbda)
        if np.max(np.abs(new_lmbda - lmbda)) < tol:
            lmbda = new_lmbda
            F_hist.append(compute_F(lmbda))
            lambda_hist.append(lmbda.copy())
            break
        lmbda = new_lmbda

    return np.array(lambda_hist), np.array(F_hist)

In [133]:
x1 = X[0]                          # first data point from genimages
K  = mu.shape[1]
lambda0 = 0.5 * np.ones(K)

lambda_hist, F_hist = MeanField_single(
    x1, mu, sigma, pie,
    lambda0=lambda0, maxsteps=200
)

T = len(F_hist)
Ft = F_hist
dF = Ft[1:] - Ft[:-1]              # F(t) - F(t-1)
log_dF = np.log(np.clip(dF, 1e-12, None))

plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.plot(range(T), Ft, 'o-')
plt.xlabel('Iteration t')
plt.ylabel('F_t')
plt.title('ELBO F for single data point')
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(range(1, T), log_dF, 'o-')
plt.xlabel('Iteration t')
plt.ylabel('log(F_t − F_{t-1})')
plt.title('Log increment of ELBO')
plt.grid(True)

plt.tight_layout()
plt.show()

new_lmbda[i] = 1.0 / (1.0 + np.exp(-logit))

### Effects of sigmas

In [136]:
sigmas = [0.05, 0.2, 1.0] 
all_F = []

for s in sigmas:
    _, Fh = MeanField_single(
        x1, mu, s, pie, lambda0, maxsteps=200
    )
    all_F.append(Fh)

plt.figure(figsize=(6,4))
for s, Fh in zip(sigmas, all_F):
    plt.plot(Fh, label=f"sigma={s:.3f}")
plt.xlabel("Iteration t")
plt.ylabel("F_t")
plt.title("ELBO F for different σ")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Variational Bayes for binary factors

##  Hyperparameter optimisation algorithm

In [121]:
def vb_binary_factors(X, K_max, max_iters=200, tol=1e-4, a0=1e-3, b0=1e-3):
    """
    VB (no ARD shrinkage in m) for binary latent factor model, starting with K_max factors.

    Returns:
      F_hist    : array of ELBO values over iterations
      Keff_hist : array of effective factor counts over iterations
      params    : dict of final variational params (lambda, m, tau, pie, sigma2)
    """
    N, D = X.shape
    K = K_max

    # --- Initialise variational parameters ---
    lam = np.clip(0.5 + 0.1*np.random.randn(N, K), 1e-3, 1-1e-3)  # N x K
    m   = 0.1 * np.random.randn(K, D)                              # K x D
    tau = np.ones(K)                                               # precision
    sigma2 = np.var(X)                                             # fixed noise
    pie = 0.3 * np.ones(K)                                         # Bernoulli prior

    F_hist    = []
    Keff_hist = []

    # --- ELBO (simplified) ---
    def compute_F():
        mu_kd = m
        ll = 0.0
        ent = 0.0
        prior_s = 0.0
        for n in range(N):
            ln = lam[n]                    # (K,)
            E_mu_s = ln @ mu_kd            # (D,)

            # Likelihood term
            ll -= 0.5 / sigma2 * np.sum((X[n] - E_mu_s)**2)

            # Entropy of q(S)
            ent -= np.sum(
                ln * np.log(ln + 1e-12) +
                (1 - ln) * np.log(1 - ln + 1e-12)
            )

            # Prior over S
            prior_s += np.sum(
                ln * np.log(pie + 1e-12) +
                (1 - ln) * np.log(1 - pie + 1e-12)
            )
        return ll + prior_s + ent

    # --- VB iterations ---
    for t in range(max_iters):
        mu_kd = m.copy()                  # K x D

        # === E-step: update lambda (mean-field for S) ===
        for n in range(N):
            x_n = X[n]
            for i in range(K):
                lam_minus = lam[n].copy()
                lam_minus[i] = 0.0
                E_mu_minus = lam_minus @ mu_kd
                resid = x_n - E_mu_minus

                E_mu_i = m[i]
                log_prior_odds = np.log(pie[i] / (1 - pie[i] + 1e-12))
                llr = (resid @ E_mu_i) / sigma2 - 0.5 * (E_mu_i @ E_mu_i) / sigma2
                logit = log_prior_odds + llr
                lam[n, i] = 1.0 / (1.0 + np.exp(-logit))

        # === M-step: update m, tau (NO ARD on m) ===
        for i in range(K):
            r_i = lam[:, i]                     # (N,)
            R_i = np.sum(r_i)

            contrib_others = lam @ mu_kd        # N x D
            contrib_others -= r_i[:, None] * m[i][None, :]
            resid_i = X - contrib_others        # N x D

            tau_i = 1.0 + R_i / sigma2          # no alpha term
            tau[i] = tau_i
            m[i] = (r_i[:, None] * resid_i).sum(axis=0) / (sigma2 * max(tau_i, 1e-6))

        # Update π from lam
        pie = lam.mean(axis=0)

        # === ELBO and effective K ===
        F_t = compute_F()
        F_hist.append(F_t)

        norm_m = np.linalg.norm(m, axis=1)
        Keff_t = np.sum(norm_m > 1e-3)
        Keff_hist.append(Keff_t)

        if t > 5 and abs(F_hist[-1] - F_hist[-2]) < tol:
            break

    params = dict(lambda_=lam, m=m, tau=tau, pie=pie, sigma2=sigma2)
    return np.array(F_hist), np.array(Keff_hist), params

## Results

In [124]:
K_values = [2, 4, 8, 16]   # half to 4× true

results = {}
for Kmax in K_values:
    F_hist, Keff_hist, params = vb_binary_factors(
        X, K_max=Kmax, max_iters=200, tol=1e-4
    )
    results[Kmax] = dict(F=F_hist, Keff=Keff_hist, params=params)

In [127]:
plt.figure(figsize=(10,4))

# Left: ELBO vs iteration
plt.subplot(1,2,1)
for Kmax in K_values:
    F = results[Kmax]["F"]
    plt.plot(range(len(F)), F, label=f"Kmax={Kmax}")
plt.xlabel("VB iteration")
plt.ylabel("Free energy F")
plt.title("VB free energy vs iteration")
plt.legend()
plt.grid(True)

# Right: effective number of factors vs iteration
plt.subplot(1,2,2)
for Kmax in K_values:
    Keff = results[Kmax]["Keff"]
    plt.plot(range(len(Keff)), Keff, label=f"Kmax={Kmax}")
plt.xlabel("VB iteration")
plt.ylabel("Effective K")
plt.title("Effective number of factors vs iteration")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# EP & Loopy BP

In [139]:
def loopyBP(h, J, max_iters=50, tol=1e-6):
    """
    Loopy BP on binary Ising model with fields h and couplings J.

    h: shape (K,)
    J: shape (K, K), with zeros on the diagonal

    Returns:
        m  : shape (K,) approximate node marginals E[s_i]
        nu : shape (K, K) message natural params nu_{i->j}
    """
    K = len(h)
    nu = np.zeros((K, K))  # messages in natural-parameter form

    sigma = lambda x: 1.0 / (1.0 + np.exp(-x))

    for it in range(max_iters):
        nu_old = nu.copy()
        for i in range(K):
            for j in range(K):
                if i == j:
                    continue
                # cavity field at i excluding j
                eta_i_ex = h[i] + sum(nu[k, i] for k in range(K) if k != j)
                m_i_ex = stable_sigmoid(eta_i_ex)

                # BP message update i -> j for factor exp(J_ij s_i s_j)
                J_ij = np.clip(J[i, j], -20.0, 20.0)
                num = np.exp(J_ij) * m_i_ex + (1.0 - m_i_ex)
                den = m_i_ex + (1.0 - m_i_ex)
                nu[i, j] = np.log(num / den)

        if np.max(np.abs(nu - nu_old)) < tol:
            # print(f"Loopy BP converged in {it+1} iterations")
            break

    # Node beliefs from local fields + incoming messages
    eta = h + np.sum(nu, axis=0)
    m = sigma(eta)
    return m, nu


## Run on data

In [151]:
N, D = X.shape
K = mu.shape[1]
sigma2 = sigma**2

posterior_mf = np.zeros((N, K))
posterior_bp = np.zeros((N, K))

for n in range(N):
    x_n = X[n]

    # Fields and couplings for this data point
    # h_i = μ_i^T x / σ² − ½||μ_i||²/σ² + log(π_i / (1-π_i))
    h_n = (mu.T @ x_n) / sigma2 \
          - 0.5 * np.sum(mu**2, axis=0) / sigma2 \
          + np.log(pie / (1 - pie))

    # J_ij = μ_i^T μ_j / σ², diagonal = 0
    J = (mu.T @ mu) / sigma2
    np.fill_diagonal(J, 0.0)

    # Mean-field for this point
    lambda0 = 0.5 * np.ones(K)
    lambda_hist, F_hist = MeanField_single(
        x_n, mu, sigma, pie, lambda0,
        maxsteps=200, tol=1e-10
    )
    posterior_mf[n] = lambda_hist[-1]  # final λ_n

    # Loopy BP for this point
    m_bp, _ = loopyBP(h_n, J, max_iters=50, tol=1e-6)
    posterior_bp[n] = m_bp


## MF vs BP

In [154]:
# Mean absolute difference per factor
mad_per_factor = np.mean(np.abs(posterior_mf - posterior_bp), axis=0)
print("Mean |MF - BP| per factor:", mad_per_factor)

# Overall mean absolute difference
mad_overall = np.mean(np.abs(posterior_mf - posterior_bp))
print("Overall mean |MF - BP|:", mad_overall)

# Scatter plot over all N×K entries
plt.figure(figsize=(4,4))
plt.scatter(
    posterior_mf.flatten(),
    posterior_bp.flatten(),
    alpha=0.3, s=10
)
plt.plot([0, 1], [0, 1], 'k--', linewidth=1)
plt.xlabel('Mean-field E[s_i | x_n]')
plt.ylabel('Loopy BP E[s_i | x_n]')
plt.title('MF vs BP posterior means (all points)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=cb182644-878e-48cb-992b-68a78a5afe3d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>