In [15]:
import pandas as pd
import numpy as np
# import seaborn as sns
from operator import attrgetter

In [17]:
# importing R dependencies
import rpy2
import readline
import rpy2.robjects as robjects

In [18]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [19]:
robjects.r('pi')

array([ 3.14159265])

In [22]:
%R require(ggplot2)




array([0], dtype=int32)

In [26]:
robjects.r.array([1])

R object with classes: ('array',) mapped to:
<Array - Python:0x7fa50d6e4dc8 / R:0x30a3b38>
[IntVector]

We begin with the following mixture model $f$
$$f(x) = \pi \phi(x|\mu_{1},\sigma_{1}^{2}) + (1-\pi)\phi(x|\mu_{2},\sigma_{2}^{2})$$

where $\pi \in [0,1]$.


Given a dataset of $n$ observations we have that the likelihood function is

$$
L(\mu_{1},\sigma_{1}^{2},\mu_{2},\sigma_{2}^{2}|x) = 
\prod_{i=1}^{n}[\pi\phi(x_{i}|\mu_{1},\sigma_{1}^{2}) + (1-\pi)\phi(x_{i}|\mu_{2},\sigma_{2}^{2})]
$$

# Algorithm

In [381]:
class Particle:
    def __init__(self,position,velocity):
        self.position = position
        self.velocity = velocity
        self.pbest = position
        self.current_fitness = 0
        self.best_fitness = 0
        self.num_parameters = len(self.position)
    def update_position(self,gbest,
                        use_boundary=False,
                        lower_bound=None,
                        upper_bound=None):
                        
        c1 = 2
        c2 = 2
        r1 = np.random.rand(1)
        r2 = np.random.rand(1)
        delta = 2*r1*(self.pbest - self.position) + 2*r2*(gbest - self.position)
        
        upper_bound=np.zeros(self.num_parameters),
        lower_bound=np.zeros(self.num_parameters)
        
        cond1 = delta <= upper_bound
        cond2 = delta >= lower_bound
        
        if (use_boundary and cond1.all() and cond2.all()) or use_boundary==False:
            self.velocity = self.velocity + delta
            self.position = self.position + self.velocity
    def calculate_fitness(self):
        self.current_fitness = self._gaussian_likelihood(self.position)
        if self.current_fitness > self.best_fitness:
            self.pbest = self.position
            self.best_fitness = self.current_fitness
    def _gaussian_likelihood(self,position):
        return np.sum(self.position)
        

In [349]:
# test particle
a = Particle(np.array([0,0]),np.array([0.1,0.2]))
a.calculate_fitness()
a.update_position(np.array([1,1]),False)
a.calculate_fitness()
a.position

array([ 0.98228976,  1.08228976])

In [379]:
# Algorithm

N = 2000
iterations = 10000
    
data_min = -2
data_max = 199
lower_boundary = np.array([0,data_min])
upper_boundary = np.array([1,data_max])    
particles = [0] * N
tolerance = 0.001

# random intialization of particles

# estimating two parameters for each of the two gaussian components
# posn[0] = weight
# posn[1] = mean

for p in range(N):    
    rand_posn = np.array([np.random.uniform(0,1),
                         np.random.uniform(data_min,data_max)])
    rand_velocity = np.array([np.random.uniform(0,1),
                             np.random.uniform(0,1)])
    particles[p] = Particle(rand_posn,rand_velocity)
    particles[p].calculate_fitness()
    
# repeat until convergence
gbest = max(particles,key=attrgetter('best_fitness'))    

for p in particles:
    p.update_position(gbest.position,False,lower_boundary,upper_boundary)
    gbest = max(particles,key=attrgetter('best_fitness'))    
print(gbest.pbest)
print(gbest.best_fitness)



[   0.73268781  198.90201298]
199.634700791
