# 高斯混合模型

In [1]:
import numpy as np
import matplotlib.pyplot as plt

### 生成一些数据用来做实验
### 1、生成均值为1.71，标准差为0.056的男生身高数据

In [3]:
np.random.seed(0)

mu_m = 1.71  #期望
sigma_m = 0.056  #标准差
num_m = 10000  #数据个数为10000
rand_data_m = np.random.normal(mu_m, sigma_m, num_m)#生成数据
y_m = np.ones(num_m)#生成标签

### 2、生成均值为1.58，标准差为0.051的女生身高数据

In [4]:
np.random.seed(0)

mu_w = 1.58  #期望
sigma_w = 0.051  #标准差数据
num_w = 10000  #个数为10000
rand_data_w = np.random.normal(mu_w, sigma_w, num_w)#生成数据
y_w = np.zeros(num_m)#生成标签

### 3、把男生数据和女生数据合在一起

In [5]:
data = np.append(rand_data_m,rand_data_w)
data = data.reshape(-1,1)
y = np.append(y_m,y_w)
print(data)
print(y)

[[1.80878693]
 [1.7324088 ]
 [1.76480933]
 ...
 [1.60636048]
 [1.57832104]
 [1.64620368]]
[1. 1. 1. ... 0. 0. 0.]


In [6]:
from scipy.stats import multivariate_normal

num_iter = 1000
n, d = data.shape
#初始化参数
mu1 = data.min(axis=0)
mu2 = data.max(axis=0)
sigma1 = np.identity(d)
sigma2 = np.identity(d)
pi = 0.5

for i in range(num_iter):
    #计算gamma
    norm1 = multivariate_normal(mu1, sigma1)
    norm2 = multivariate_normal(mu2, sigma2)
    tau1 = pi * norm1.pdf(data)
    tau2 = (1 - pi) * norm2.pdf(data)
    gamma = tau1 / (tau1 + tau2)

    #计算mu1
    mu1 = np.dot(gamma, data) / np.sum(gamma)
    #计算mu2
    mu2 = np.dot((1 - gamma), data) / np.sum((1 - gamma))
    #计算sigma1
    sigma1 = np.dot(gamma * (data - mu1).T, data - mu1) / np.sum(gamma)
    #计算sigmal2
    sigma2 = np.dot((1 - gamma) * (data - mu2).T, data - mu2) / np.sum(1 - gamma)
    #计算pi
    pi = np.sum(gamma) / n

print(u'类别概率:\t', pi)
print(u'均值:\t', mu1, mu2)
print(u'方差:\n', sigma1, '\n\n', sigma2, '\n')

类别概率:	 0.487388464507264
均值:	 [1.57749047] [1.70726384]
方差:
 [[0.00244834]] 

 [[0.00315184]] 

