<a href="https://colab.research.google.com/github/polrgn/biomedical_clustering_topic_modeling/blob/main/_appendix_DBLBM_M3_syntheticdatasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Libraries import

In [None]:
import numpy as np
import random
from sklearn.metrics.cluster import adjusted_rand_score, adjusted_mutual_info_score

## DBLBM $\mathcal{M}_3$ implementation

###Helper functions

In [None]:
def initialize(x,n,d,g):
  while True:
    z = np.zeros(shape=(n,g))
    for i in range(0,n):
      z[i,np.random.randint(g)] = 1

    w = np.zeros(shape=(d,g))
    for j in range(0,d):
      w[j,np.random.randint(g)] = 1
    
    pi = np.sum(z,axis=0)/n
    rho = np.sum(w,axis=0)/d

    eps = eps_hat_funct(z,w,x,n,d,g)

    if (sum(pi==0)==0):
      break

  print('initial z:\n',z[0:min(10,n),],'...')
  print('initial w:\n',w[0:min(10,d),],'...')
  print('initial pi',pi)
  print('initial rho',rho)
  print('initial eps',eps)
  return z,w,pi,rho,eps

In [None]:
# Get epsilon hat
def eps_hat_funct (z,w,x,n,d,g):
  vect_x_kk_zw = np.empty(shape=g)
  vect_x_kk_zw[:]= np.NaN
  for k in range(0,g):
    x_kk_zw = 0
    for i in range(0,n):
      for j in range(0,d):
        x_kk_zw = x_kk_zw + z[i,k]*w[j,k]*x[i,j]
    vect_x_kk_zw[k] = x_kk_zw

  term_1 = np.sum(np.abs(vect_x_kk_zw - np.sum(z,axis=0)*np.sum(w,axis=0)))

  sum_x_kl_zw = 0
  #counter = 0
  for k in range(0,g):
    for l in range(0,g):
      if l == k : 
        continue
      x_kl_zw = 0
      for i in range(0,n):
        for j in range(0,d):
          x_kl_zw = x_kl_zw + z[i,k]*w[j,l]*x[i,j]
      sum_x_kl_zw = sum_x_kl_zw + x_kl_zw
      #counter +=1
      #print(counter)

  eps_hat = max((term_1 + sum_x_kl_zw)/(n*d),0.05)
  return(eps_hat)

In [None]:
# Get Aik's
def get_Aiks(z,w,x,pi,eps,n,d,g):
  A = np.empty(shape=(n,g))
  A[:]= np.NaN
  for i in range(0,n):
    for k in range(0,g):
      xik_w = 0
      w_j = 0
      for j in range(0,d):
        xik_w = xik_w + w[j,k]*x[i,j]
        w_j = w_j + w[j,k]
      term1 = np.abs(xik_w- w_j)

      sum_x_il_w = 0
      for l in range(0,g):
        if l == k : 
          continue
        x_il_w = 0
        for j in range(0,d):
          x_il_w = x_il_w + w[j,l]*x[i,j]
        sum_x_il_w = sum_x_il_w + x_il_w
      
      #eps = min(eps,0.05)
      A[i,k] = (np.log(eps)-np.log(1-(eps)))*(term1 + sum_x_il_w) + np.log(pi[k]+0.001)
  return A

def update_z(z,A,n):
  z_update = np.copy(z)
  for i in range(0,n):
    z_update[i,:] = 0
    k_star = np.where(A[i,:]==np.max(A[i,:]))[0][0]
    z_update[i,int(k_star)] = 1
  return z_update

In [None]:
def maximize_classlikelihood_wrt_z(z,w,x,pi,eps,n,d,g):
  current_w = np.copy(w)
  current_z = np.copy(z)
  current_pi = np.copy(pi)
  current_eps = np.copy(eps)

  print('starting eps',current_eps)
  while True:
    A=get_Aiks(current_z,current_w,x,current_pi,current_eps,n,d,g)
    updated_z = update_z(current_z,A,n)
    updated_pi = np.sum(updated_z,axis=0)/n
    updated_eps = eps_hat_funct (updated_z,current_w,x,n,d,g)

    diff_z = np.sum(np.abs(updated_z-current_z))/np.sum(current_z)
    diff_pi = np.sum(np.abs(updated_pi-current_pi))/np.sum(current_pi)
    diff_eps = np.abs(updated_eps-current_eps)/current_eps
 
    current_z = updated_z
    current_pi = updated_pi
    current_eps = updated_eps
    print('updated eps',current_eps)
    
    if((diff_z<=0.01) and (diff_pi<=0.01) and (diff_eps<=0.01)):
      break
      
  return current_z, current_pi, current_eps
  

In [None]:
# Get Bjl's
def get_Bjls(z,w,x,rho,eps,n,d,g):
  B = np.empty(shape=(d,g))
  B[:]= np.NaN
  for j in range(0,d):
    for l in range(0,g):
      xlj_z = 0
      z_l = 0
      for i in range(0,n):
        xlj_z = xlj_z + z[i,l]*x[i,j]
        z_l = z_l + z[i,l]
      term1 = np.abs(xlj_z- z_l)

      sum_x_kj_z = 0
      for k in range(0,g):
        if k == l : 
          continue
        x_kj_z = 0
        for i in range(0,n):
          x_kj_z = x_kj_z + z[i,k]*x[i,j]
        sum_x_kj_z = sum_x_kj_z + x_kj_z
      #eps = min(eps,0.05)
      B[j,l] = (np.log(eps)-np.log(1-(eps)))*(term1 + sum_x_kj_z) + np.log(rho[l]+0.001)
  return B

def update_w(w,B,d):
  w_update = np.copy(w)
  for j in range(0,d):
    w_update[j,:] = 0
    l_star = np.where(B[j,:]==np.max(B[j,:]))[0][0]
    w_update[j,int(l_star)] = 1
  return w_update

In [None]:
def maximize_classlikelihood_wrt_w(z,w,x,rho,eps,n,d,g):
  current_w = np.copy(w)
  current_z = np.copy(z)
  current_rho = np.copy(rho)
  current_eps = np.copy(eps)

  print('starting eps',current_eps)
  while True:
    B=get_Bjls(current_z,current_w,x,current_rho,current_eps,n,d,g)
    updated_w = update_w(current_w,B,d)
    updated_rho = np.sum(updated_w,axis=0)/d
    updated_eps = eps_hat_funct(current_z,updated_w,x,n,d,g)

    diff_w = np.sum(np.abs(updated_w-current_w))/np.sum(current_w)
    diff_rho = np.sum(np.abs(updated_rho-current_rho))/np.sum(current_rho)
    diff_eps = np.abs(updated_eps-current_eps)/current_eps
 
    current_w = updated_w
    current_rho = updated_rho
    current_eps = updated_eps
    print('updated eps',current_eps)
    
    if((diff_w<=0.01) and (diff_rho<=0.01) and (diff_eps<=0.01)):
      break
      
  return current_w, current_rho, current_eps

### Wrap up DBLBM M3 function

In [None]:
def maximize_classlikelihood(z,w,x,pi,rho,eps,n,d,g):
  current_z = np.copy(z)
  current_w = np.copy(w)
  current_pi = np.copy(pi)
  current_rho = np.copy(rho)
  current_eps = np.copy(eps)

  while True:
    print('\nMaximizing with respect to z\n')
    updated_z, updated_pi, updated_eps = maximize_classlikelihood_wrt_z(current_z,current_w,x,current_pi,current_eps,n,d,g)
    print('\nMaximizing with respect to w\n')
    updated_w, updated_rho, updated_eps = maximize_classlikelihood_wrt_w(updated_z,current_w,x,current_rho,updated_eps,n,d,g)
    
    diff_z = np.sum(np.abs(updated_z-current_z))/np.sum(current_z)
    diff_w = np.sum(np.abs(updated_w-current_w))/np.sum(current_w)
    diff_pi = np.sum(np.abs(updated_pi-current_pi))/np.sum(current_pi)
    diff_rho = np.sum(np.abs(updated_rho-current_rho))/np.sum(current_rho)
    diff_eps = np.abs(updated_eps-current_eps)/np.sum(current_eps)

    current_z = updated_z
    current_w = updated_w
    current_pi = updated_pi
    current_rho = updated_rho
    current_eps = updated_eps
    print('updated eps',current_eps)
    
    if((diff_z<=0.01) and (diff_w<=0.01) and (diff_pi<=0.01) and (diff_rho<=0.01) and (diff_eps<=0.01)):
      break
    
  return current_z, current_w, current_pi, current_rho, current_eps

##Experiments

### Helper functions for experiment

In [None]:
def compute_loglikelihood(z,w,x,pi,rho,eps,n,d,g):
  L_C = np.log(1-eps)*n*d
  for i in range(0,n):
    for k in range(0,g):
      add = z[i,k]*np.log(pi[k])
      if ~np.isnan(add):
        L_C = L_C + add
      for j in range(0,d):
        add = (np.log(eps)-np.log(1-eps))*z[i,k]*w[j,k]*np.abs(x[i,j]-1)
        if ~np.isnan(add):
          L_C= L_C + add
  
  for j in range(0,d):
    for l in range(0,g):
      add = w[j,l]*np.log(rho[l])
      if ~np.isnan(add):
        L_C = L_C + add

  for i in range(0,n):
    for j in range(0,d):
      for k in range(0,g):
        for l in range(0,g):
          if l == k : 
            continue
          add = (np.log(eps)-np.log(1-eps))*z[i,k]*w[j,l]*x[i,j]
          if ~np.isnan(add):
            L_C = L_C + add
  return L_C

In [None]:
def get_predicted_lab(z,n):
  labels_pred = np.empty(shape=(n,1))
  labels_pred[:]= np.NaN
  for i in range(0,n):
    labels_pred[i,0] = np.where(z[i,:]==1)[0]
  return labels_pred

### Experiment 1

Perfectly random binary matrix. No cluster. Algorithm expected to assign clusters by chance. Performance is measured with adjusted rand score and adjusted mutual information, expected to be 0.

In [None]:
n = 100
d = 200
x = np.random.randint(2, size=(n, d))
g = 4
print('A glance at the matrix')
print(x)

labels = np.empty(shape=(n, 1))
for i in range(0,n):
  labels[i,0] = random.randint(0,g-1)

z0,w0,pi0,rho0,eps0= initialize(x,n,d,g)
optim_z, optim_w, optim_pi, optim_rho, optim_eps = maximize_classlikelihood(z0,w0,x,pi0,rho0,eps0,n,d,g)
print('Optimized row partition z:\n',optim_z[0:min(10,n),],'...')
print('Optimized column partition w:\n',optim_w[0:min(10,d),],'...')
labels_pred = get_predicted_lab(optim_z,n)
print('Adj. Rand score',adjusted_rand_score(labels[:,0],labels_pred[:,0]))
print('Adj. Mutual Information',adjusted_mutual_info_score(labels[:,0],labels_pred[:,0]))

A glance at the matrix
[[1 0 0 ... 1 0 0]
 [1 0 0 ... 0 1 1]
 [0 1 1 ... 0 1 0]
 ...
 [1 1 1 ... 1 1 1]
 [1 1 0 ... 1 1 0]
 [0 0 0 ... 1 0 0]]
initial z:
 [[1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]] ...
initial w:
 [[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]] ...
initial pi [0.23 0.21 0.31 0.25]
initial rho [0.265 0.215 0.275 0.245]
initial eps 0.50585

Maximizing with respect to z

starting eps 0.50585
updated eps 0.51905
updated eps 0.50185
updated eps 0.49635
updated eps 0.49635

Maximizing with respect to w

starting eps 0.49635
updated eps 0.46125
updated eps 0.46005
updated eps 0.46005
updated eps 0.46005

Maximizing with respect to z

starting eps 0.46005
updated eps 0.46005

Maximizing with respect to w

starting eps 0.46005
updated eps 0.46005
updated eps 0.46005
Opti

### Experiment 2

Perfectly block diagonal matrix with variant dimensions and number of blocks/clusters. The algorithm is expected to recover the correct labels, performance is measured with adjusted rand score and adjusted mutual information, expected to be close to 1.

#### Experiment 2.1
Perfectly block diagonal matrix 100 $\times$ 500 and 3 blocks/clusters

In [None]:
n = 100
d = 500
g = 3
x = np.zeros(shape=(n, d))
row_dim = [[0,25],[25,55],[55,100]]
col_dim = [[0,225],[225,375],[375,500]]
labels = np.empty(shape=(n, 1))
for i in range(0,g):
  x[row_dim[i][0]:row_dim[i][1],col_dim[i][0]:col_dim[i][1]] = 1
  labels[row_dim[i][0]:row_dim[i][1],0] = i

print('A glance at the matrix')
print(x)
print('A glance at the labels')
print('First rows',labels[0:10,0])
print('Last rows',labels[90:100,0])

print('Sparsity',sum(sum(x==0))/(np.shape(x)[0]*np.shape(x)[1]))

# the matrix and the labels are shuffled
index_rows = list(range(0,n))
index_cols = list(range(0,d))
random.shuffle(index_rows)
random.shuffle(index_cols)
x = x[tuple(index_rows),:]
x = x[:,tuple(index_cols)]
labels = labels[tuple(index_rows),:]

z0,w0,pi0,rho0,eps0= initialize(x,n,d,g)
optim_z, optim_w, optim_pi, optim_rho, optim_eps = maximize_classlikelihood(z0,w0,x,pi0,rho0,eps0,n,d,g)
labels_pred = get_predicted_lab(optim_z,n)

print('Adj. Rand score',adjusted_rand_score(labels[:,0],labels_pred[:,0]))
print('Adj. Mutual Information',adjusted_mutual_info_score(labels[:,0],labels_pred[:,0]))

A glance at the matrix
[[1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]]
A glance at the labels
First rows [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Last rows [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
Sparsity 0.685
initial z:
 [[0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]] ...
initial w:
 [[1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] ...
initial pi [0.28 0.35 0.37]
initial rho [0.328 0.336 0.336]
initial eps 0.43952

Maximizing with respect to z

starting eps 0.43952
updated eps 0.418
updated eps 0.418

Maximizing with respect to w

starting eps 0.418
updated eps 0.175
updated eps 0.175
updated eps 0.175

Maximizing with respect to z

starting eps 0.175
updated eps 0.05
updated eps 0.05

Maximizing with respect to w

starting eps 0.05
updated eps 0.05
up

#### Experiment 2.2
Perfectly block diagonal matrix 100 $\times$ 500 and 6 blocks/clusters

In [None]:
n = 100
d = 500
g = 6
x = np.zeros(shape=(n, d))
row_dim = [[0,10],[10,25],[25,50],[50,60],[60,75],[75,100]]
col_dim = [[0,125],[125,175],[175,250],[250,375],[375,425],[425,500]]
labels = np.empty(shape=(n, 1))
for i in range(0,g):
  x[row_dim[i][0]:row_dim[i][1],col_dim[i][0]:col_dim[i][1]] = 1
  labels[row_dim[i][0]:row_dim[i][1],0] = i

print('A glance at the matrix')
print(x)
print('A glance at the labels')
print('First rows',labels[0:10,0])
print('Last rows',labels[90:100,0])

print('Sparsity',sum(sum(x==0))/(np.shape(x)[0]*np.shape(x)[1]))

# the matrix and the labels are shuffled
index_rows = list(range(0,n))
index_cols = list(range(0,d))
random.shuffle(index_rows)
random.shuffle(index_cols)
x = x[tuple(index_rows),:]
x = x[:,tuple(index_cols)]
labels = labels[tuple(index_rows),:]

z0,w0,pi0,rho0,eps0= initialize(x,n,d,g)
optim_z, optim_w, optim_pi, optim_rho, optim_eps = maximize_classlikelihood(z0,w0,x,pi0,rho0,eps0,n,d,g)
labels_pred = get_predicted_lab(optim_z,n)

print('Adj. Rand score',adjusted_rand_score(labels[:,0],labels_pred[:,0]))
print('Adj. Mutual Information',adjusted_mutual_info_score(labels[:,0],labels_pred[:,0]))

A glance at the matrix
[[1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]]
A glance at the labels
First rows [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Last rows [5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
Sparsity 0.845
initial z:
 [[0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]] ...
initial w:
 [[0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]] ...
initial pi [0.14 0.2  0.16 0.17 0.16 0.17]
initial rho [0.134 0.178 0.184 0.17  0.182 0.152]
initial eps 0.26974

Maximizing with respect to z

starting eps 0.26974
updated eps 0.2407
updated eps 0.2407

Maximizing with respect to w

starting e

#### Experiment 2.3
Perfectly block diagonal matrix 1000 $\times$ 5000 and 3 blocks/clusters

In [None]:
n = 1000
d = 5000
g = 3
x = np.zeros(shape=(n, d))
row_dim = [[0,250],[250,550],[550,1000]]
col_dim = [[0,2250],[2250,3750],[3750,5000]]
labels = np.empty(shape=(n, 1))
for i in range(0,g):
  x[row_dim[i][0]:row_dim[i][1],col_dim[i][0]:col_dim[i][1]] = 1
  labels[row_dim[i][0]:row_dim[i][1],0] = i

print('A glance at the matrix')
print(x)
print('A glance at the labels')
print('First rows',labels[0:10,0])
print('Last rows',labels[90:100,0])

print('Sparsity',sum(sum(x==0))/(np.shape(x)[0]*np.shape(x)[1]))

# the matrix and the labels are shuffled
index_rows = list(range(0,n))
index_cols = list(range(0,d))
random.shuffle(index_rows)
random.shuffle(index_cols)
x = x[tuple(index_rows),:]
x = x[:,tuple(index_cols)]
labels = labels[tuple(index_rows),:]

z0,w0,pi0,rho0,eps0= initialize(x,n,d,g)
optim_z, optim_w, optim_pi, optim_rho, optim_eps = maximize_classlikelihood(z0,w0,x,pi0,rho0,eps0,n,d,g)
labels_pred = get_predicted_lab(optim_z,n)

print('Adj. Rand score',adjusted_rand_score(labels[:,0],labels_pred[:,0]))
print('Adj. Mutual Information',adjusted_mutual_info_score(labels[:,0],labels_pred[:,0]))

A glance at the matrix
[[1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]]
A glance at the labels
First rows [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Last rows [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Sparsity 0.685
initial z:
 [[0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]] ...
initial w:
 [[0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]] ...
initial pi [0.342 0.337 0.321]
initial rho [0.3392 0.3308 0.33  ]
initial eps 0.4385716

Maximizing with respect to z

starting eps 0.4385716
updated eps 0.43346
updated eps 0.43346

Maximizing with respect to w

starting eps 0.43346
updated eps 0.175
updated eps 0.175
updated eps 0.175

Maximizing with respect to z

starting eps 0.175
updated eps 0.05
updated eps 0.05

Maximizing with respect to w

starting eps 0.05
upd

#### Experiment 2.4
Perfectly block diagonal matrix 1000 $\times$ 5000 and 6 blocks/clusters

In [None]:
n = 1000
d = 5000
g = 6
x = np.zeros(shape=(n, d))
row_dim = [[0,100],[100,250],[250,500],[500,600],[600,750],[750,1000]]
col_dim = [[0,1250],[1250,1750],[1750,2500],[2500,3750],[3750,4250],[4250,5000]]
labels = np.empty(shape=(n, 1))
for i in range(0,g):
  x[row_dim[i][0]:row_dim[i][1],col_dim[i][0]:col_dim[i][1]] = 1
  labels[row_dim[i][0]:row_dim[i][1],0] = i

print('A glance at the matrix')
print(x)
print('A glance at the labels')
print('First rows',labels[0:10,0])
print('Last rows',labels[90:100,0])

print('Sparsity',sum(sum(x==0))/(np.shape(x)[0]*np.shape(x)[1]))

# the matrix and the labels are shuffled
index_rows = list(range(0,n))
index_cols = list(range(0,d))
random.shuffle(index_rows)
random.shuffle(index_cols)
x = x[tuple(index_rows),:]
x = x[:,tuple(index_cols)]
labels = labels[tuple(index_rows),:]

z0,w0,pi0,rho0,eps0= initialize(x,n,d,g)
optim_z, optim_w, optim_pi, optim_rho, optim_eps = maximize_classlikelihood(z0,w0,x,pi0,rho0,eps0,n,d,g)
labels_pred = get_predicted_lab(optim_z,n)

print('Adj. Rand score',adjusted_rand_score(labels[:,0],labels_pred[:,0]))
print('Adj. Mutual Information',adjusted_mutual_info_score(labels[:,0],labels_pred[:,0]))

A glance at the matrix
[[1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 1. 1. 1.]]
A glance at the labels
First rows [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Last rows [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Sparsity 0.845
initial z:
 [[0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]] ...
initial w:
 [[0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1.]] ...
initial pi [0.161 0.17  0.176 0.165 0.151 0.177]
initial rho [0.1698 0.1636 0.1684 0.1766 0.1616 0.16  ]
initial eps 0.2698224

Maximizing with respect to z

starting eps 0.2698224
updated eps 0.26358
updated eps 0.26358

Maximizing with respec