In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("faithful.csv")
df = df.drop(['Unnamed: 0'], axis=1)

In [3]:
df

Unnamed: 0,eruptions,waiting
0,3.600,79
1,1.800,54
2,3.333,74
3,2.283,62
4,4.533,85
...,...,...
267,4.117,81
268,2.150,46
269,4.417,90
270,1.817,46


In [4]:
x = df.values

In [5]:
x

array([[ 3.6  , 79.   ],
       [ 1.8  , 54.   ],
       [ 3.333, 74.   ],
       [ 2.283, 62.   ],
       [ 4.533, 85.   ],
       [ 2.883, 55.   ],
       [ 4.7  , 88.   ],
       [ 3.6  , 85.   ],
       [ 1.95 , 51.   ],
       [ 4.35 , 85.   ],
       [ 1.833, 54.   ],
       [ 3.917, 84.   ],
       [ 4.2  , 78.   ],
       [ 1.75 , 47.   ],
       [ 4.7  , 83.   ],
       [ 2.167, 52.   ],
       [ 1.75 , 62.   ],
       [ 4.8  , 84.   ],
       [ 1.6  , 52.   ],
       [ 4.25 , 79.   ],
       [ 1.8  , 51.   ],
       [ 1.75 , 47.   ],
       [ 3.45 , 78.   ],
       [ 3.067, 69.   ],
       [ 4.533, 74.   ],
       [ 3.6  , 83.   ],
       [ 1.967, 55.   ],
       [ 4.083, 76.   ],
       [ 3.85 , 78.   ],
       [ 4.433, 79.   ],
       [ 4.3  , 73.   ],
       [ 4.467, 77.   ],
       [ 3.367, 66.   ],
       [ 4.033, 80.   ],
       [ 3.833, 74.   ],
       [ 2.017, 52.   ],
       [ 1.867, 48.   ],
       [ 4.833, 80.   ],
       [ 1.833, 59.   ],
       [ 4.783, 90.   ],


In [31]:
# Exponent function for estimating e
exponent_func = np.vectorize(lambda x, std: np.exp((-1.0*x)/(2 * std)))

# Performs the E step give a list of x values, a list of means and a matrix of estimated values 
def E_step(x_list, mean_list, e_matrix):
	# Compute the standard deviation
	std_list = get_std(x_list, mean_list, e_matrix)
	estimated = list()
	for i,std in enumerate(std_list):
		estimated.append(exponent_func(np.square(x_list - mean_list[i]), std))
	# Calculating denominator
	estimated = np.array(estimated).transpose()
	for i, n in enumerate(estimated):
		estimated[i] /= n.sum()
	return estimated

# Performs the M step give a list of x values, a list of means
def M_step(x_list, e_matrix):
	# Calculating numerator
	numerator = np.dot(x_list, e_matrix)
	# Calculating denominator
	denominator = e_matrix.sum(axis=0)
	return np.divide(numerator, denominator)

# Compute the standard deviation
def get_std(x_list, mean_list, e_matrix):
	x_vector = x_list[np.newaxis].transpose()[0]
	print(x_vector)
	var = np.square(mean_list - x_vector) * e_matrix
	return np.sqrt(var.sum(axis=0)/e_matrix.sum(axis=0))

# Computes theta
def get_theta(e_matrix):
	return e_matrix.sum(axis=0) / len(e_matrix)

# Performs E-M for a number of steps
def simulate_E_M(x_list, e_matrix ,steps):
	mean_matrix = list()
	for i in range(steps):
		mean_list = M_step(x_list, e_matrix)
		mean_matrix.append(mean_list)
		e_matrix  = E_step(x_list, mean_list, e_matrix)
	return np.array(mean_matrix).transpose()

In [32]:
prob = np.array([0.4411765,0.5588235])

In [33]:
prob

array([0.4411765, 0.5588235])

In [34]:
simulate_E_M(x, prob ,10)

[[3.6  ]
 [1.8  ]
 [3.333]
 [2.283]
 [4.533]
 [2.883]
 [4.7  ]
 [3.6  ]
 [1.95 ]
 [4.35 ]
 [1.833]
 [3.917]
 [4.2  ]
 [1.75 ]
 [4.7  ]
 [2.167]
 [1.75 ]
 [4.8  ]
 [1.6  ]
 [4.25 ]
 [1.8  ]
 [1.75 ]
 [3.45 ]
 [3.067]
 [4.533]
 [3.6  ]
 [1.967]
 [4.083]
 [3.85 ]
 [4.433]
 [4.3  ]
 [4.467]
 [3.367]
 [4.033]
 [3.833]
 [2.017]
 [1.867]
 [4.833]
 [1.833]
 [4.783]
 [4.35 ]
 [1.883]
 [4.567]
 [1.75 ]
 [4.533]
 [3.317]
 [3.833]
 [2.1  ]
 [4.633]
 [2.   ]
 [4.8  ]
 [4.716]
 [1.833]
 [4.833]
 [1.733]
 [4.883]
 [3.717]
 [1.667]
 [4.567]
 [4.317]
 [2.233]
 [4.5  ]
 [1.75 ]
 [4.8  ]
 [1.817]
 [4.4  ]
 [4.167]
 [4.7  ]
 [2.067]
 [4.7  ]
 [4.033]
 [1.967]
 [4.5  ]
 [4.   ]
 [1.983]
 [5.067]
 [2.017]
 [4.567]
 [3.883]
 [3.6  ]
 [4.133]
 [4.333]
 [4.1  ]
 [2.633]
 [4.067]
 [4.933]
 [3.95 ]
 [4.517]
 [2.167]
 [4.   ]
 [2.2  ]
 [4.333]
 [1.867]
 [4.817]
 [1.833]
 [4.3  ]
 [4.667]
 [3.75 ]
 [1.867]
 [4.9  ]
 [2.483]
 [4.367]
 [2.1  ]
 [4.5  ]
 [4.05 ]
 [1.867]
 [4.7  ]
 [1.783]
 [4.85 ]
 [3.683]
 [4.733]
 

ValueError: operands could not be broadcast together with shapes (272,272) (2,) 