In [55]:
from typing import Optional
import pandas as pd
import numpy as np
import plotly.express as px


class EM:
  def __init__(self) -> None:

    np.random.seed(42)
    self.sample_size = 20
    self.mu = [5, -1]
    self.sigma = np.array(
      [
        [1, 0.5],
        [0.5, 1],
      ]
    )
    self.na_prop = 0.2

    # Create multivariate normal
    self.multiv_norm_full = np.random.multivariate_normal(
      mean=self.mu,
      cov=self.sigma,
      size=self.sample_size,
    )

    # Create mask
    self.missing_mask = np.random.binomial(
      n=1, p=1 - self.na_prop, size=self.sample_size
    )
    self.multiv_norm = self.multiv_norm_full  # store full data
    self.multiv_norm[:, 1] *= self.missing_mask  # apply mask on column 1
    self.multiv_norm[self.multiv_norm == 0] = np.nan

  def e_step(self, x: Optional[np.array] = None) -> dict:
    """
    theta = (mu, sigma)
    """
    if x is None:
      x = self.multiv_norm
    s1 = np.sum(x[:, 0])
    s11 = np.sum(x[:, 0] ** 2)

    x_obs = x[~np.isnan(x[:, 1])]

    s2 = np.nansum(x[:, 1])
    s12 = np.nansum(np.prod(x, axis=1))

    for x_val in x[np.isnan(x[:, 1])][:, 0]:
      # Update s2
      s2 += (
        (self.mu[1] + self.sigma[1, 0] * (x_val - self.mu[0]) / self.sigma[0, 0]) ** 2
        + self.sigma[1, 1]
        - self.sigma[1, 0] ** 2 / self.sigma[0, 0]
      )

      # Update s12
      s12 += x_obs * (
        self.mu[1] + self.sigma[1, 0] * (x_obs - self.mu[1]) / self.sigma[0, 0]
      )

    print(s12)

    # return {"mu_init": mu, "sigma_init": sigma}


em = EM()
em.e_step()


[[-27.43563299 -56.86543007]
 [ 12.42594547 -60.40925127]
 [ 33.58703836 -37.89505979]
 [ 75.0382433  -62.48603255]
 [ 32.59493524 -62.01577513]
 [ 62.91122435 -55.71975679]
 [-23.6461776  -33.70066022]
 [ 30.19417447 -44.72622728]
 [ 35.87200907 -62.42688486]
 [ 30.63202756 -58.33993712]
 [ -4.08469716 -62.765224  ]
 [ 26.1498381  -48.53360675]
 [ 35.50122337 -37.9970313 ]]
