#### 计算特征值

In [26]:
import os
import numpy as np
import numpy.matlib

# 获取原始数据路径
path_train_input = "eigenfaces\\faceR"
# 读取训练数据集
data_train_input = np.loadtxt(path_train_input)
data_train_label_raw = np.load('./FaceDataset/Label/train.npy')

In [27]:
# 打印数据集的形状
print("输入形状： " , data_train_input.shape)
print("标签形状： " , data_train_label_raw.shape)

输入形状：  (1997, 100)
标签形状：  (1997, 2)


In [28]:
# 将男性标签替换为1，女性标签替换为0
a = np.char.replace(data_train_label_raw,'female','0')
b = np.char.replace(a,'male','1')
# 将数组从字符串类型转换为int类型
data_train_label = b.astype(int)

In [29]:
# 打印进行查看
print(data_train_label[1807:1809])
print(data_train_label.shape)

[[3033    1]
 [3034    0]]
(1997, 2)


In [30]:
data_only_lael = data_train_label[:,[1]]
train_data = np.concatenate((data_train_input,data_only_lael),axis=1)
train_data.shape

(1997, 101)

In [31]:
# 分离出男性和女性的数据集
train_data_male = np.zeros((0,101),dtype = float)
train_data_famale = np.zeros((0,101),dtype = float)
j = 0
for line in train_data:
    if line[-1] == 1:
        train_data_male = np.row_stack((train_data_male,train_data[j,0:101]))
    j+=1
j = 0
for line in train_data:
    if line[-1] == 0:
        train_data_famale = np.row_stack((train_data_famale,train_data[j,0:101]))
    j+=1

In [32]:
print(train_data_male.shape)
print(train_data_famale.shape)

(1150, 101)
(847, 101)


In [33]:
# 提取出只有特征的矩阵
train_pure_data_male = train_data_male[:,1:100]
train_pure_data_female = train_data_famale[:,1:100]

In [34]:
print("male :", train_pure_data_male.shape)
print("female : ", train_pure_data_female.shape)

male : (1150, 99)
female :  (847, 99)


In [35]:
# 求出分类协方差矩阵
male_covariance_matrix = np.cov(train_pure_data_male, rowvar = False)
female_covariance_matrix = np.cov(train_pure_data_female, rowvar = False)

In [36]:
print("male covariance matrix" , male_covariance_matrix.shape)
print("female covariance matrix" , female_covariance_matrix.shape)

male covariance matrix (99, 99)
female covariance matrix (99, 99)


In [37]:
# 求出分类均值
male_average = np.average(train_pure_data_male,axis=0)
female_average = np.average(train_pure_data_female,axis=0)

In [38]:
print("male average" , male_average[0:10])
print("female average" , female_average[0:10])

print("male average" , male_average.shape)
print("female average" , female_average.shape)

male average [-336.10948419   18.52917969   -3.58500327    7.97865288   24.60646817
  116.50888639 -105.94273379  -88.68269395 -101.33368304   -5.01006541]
female average [-142.00910638   92.89319962  164.41899622 -153.83048152  104.30999895
  -22.57592274  231.08783111  221.72767551 -116.7370422    84.02787477]
male average (99,)
female average (99,)


In [39]:
# 求出混合均值
# train_pure_data_1 = np.concatenate((train_pure_data_male,train_pure_data_female),axis=0)
# u_average_1 = np.average(train_pure_data_1,axis=0)
train_pure_data = train_data[:,1:100]
u_average = np.average(train_pure_data,axis=0)
u_average[0:8]

array([-253.78448669,   50.06965284,   67.67157538,  -60.65045921,
         58.41162119,   57.51798337,   37.00413075,   42.97358193])

In [40]:
# 计算类间离散度矩阵
diff_u1 = male_average - u_average
diff_u2 = female_average - u_average
diff_u1 = diff_u1.reshape(99,1)
diff_u2 = diff_u2.reshape(99,1)
u1_matrix = np.matmul(diff_u1,diff_u1.T)
u2_matrix = np.matmul(diff_u2,diff_u2.T)
S_b = u1_matrix + u2_matrix
S_b.shape

(99, 99)

In [41]:
print(diff_u1[1] , male_average[1] , u_average[1])
print(diff_u1[1] , female_average[1] , u_average[1])

[-31.54047315] 18.529179689565126 50.06965283875813
[-31.54047315] 92.89319961747346 50.06965283875813


In [42]:
# 计算类内离散度矩阵
S_w = 0.5 * (male_covariance_matrix + female_covariance_matrix)
S_w_inv = np.linalg.inv(S_w)
S_w_inv.shape

(99, 99)

In [43]:
S_b_S_w_inv = np.matmul(S_w_inv,S_b)
print(S_b_S_w_inv)

[[ 9.72904913e-03  3.72740750e-03  8.42099940e-03 ...  4.91414202e-04
   5.66444930e-05  3.54951535e-04]
 [ 2.02943506e-02  7.77520122e-03  1.75658188e-02 ...  1.02506751e-03
   1.18157817e-04  7.40412635e-04]
 [ 1.76988957e-02  6.78082673e-03  1.53193172e-02 ...  8.93971094e-04
   1.03046552e-04  6.45720882e-04]
 ...
 [ 2.08068342e-01  7.97154468e-02  1.80094001e-01 ...  1.05095305e-02
   1.21141600e-03  7.59109924e-03]
 [-8.51912871e-02 -3.26386102e-02 -7.37375018e-02 ... -4.30301130e-03
  -4.96000915e-04 -3.10809183e-03]
 [-7.08862002e-02 -2.71580244e-02 -6.13557030e-02 ... -3.58046146e-03
  -4.12713803e-04 -2.58618959e-03]]


In [44]:
rank = np.linalg.matrix_rank(S_b_S_w_inv)
rank

1

In [45]:
# 计算特征向量和特征值
eigenvalue, featurevector = np.linalg.eig(S_b_S_w_inv)
print(eigenvalue[1])

(1.5812748593283055+0j)


In [46]:
W = featurevector[:,1]
W_Mat = W.reshape(99,1)

In [47]:
# 获取特征矩阵
train_feature_data = np.matmul(train_pure_data,W_Mat)
train_feature_data.shape
print(train_feature_data.shape)
print(train_feature_data)

(1997, 1)
[[-146.76836086+0.j]
 [ -90.97183676+0.j]
 [   9.18925873+0.j]
 ...
 [  87.30373906+0.j]
 [-311.67354422+0.j]
 [-357.58405128+0.j]]


In [48]:
# 获得包含特征的完全数据
dataset_feature = np.concatenate((data_train_label,train_feature_data),axis=1)
print(dataset_feature[30:50])

[[ 1.25500000e+03+0.j  1.00000000e+00+0.j  1.17655862e+01+0.j]
 [ 1.25600000e+03+0.j  0.00000000e+00+0.j -5.81542067e+01+0.j]
 [ 1.25700000e+03+0.j  0.00000000e+00+0.j -2.65783970e+02+0.j]
 [ 1.25800000e+03+0.j  1.00000000e+00+0.j -9.73387468e+01+0.j]
 [ 1.25900000e+03+0.j  1.00000000e+00+0.j -2.83858212e+02+0.j]
 [ 1.26000000e+03+0.j  1.00000000e+00+0.j -3.66321993e+02+0.j]
 [ 1.26100000e+03+0.j  1.00000000e+00+0.j -4.73877673e+01+0.j]
 [ 1.26200000e+03+0.j  0.00000000e+00+0.j -2.96641747e+02+0.j]
 [ 1.26300000e+03+0.j  1.00000000e+00+0.j  2.42587236e+01+0.j]
 [ 1.26400000e+03+0.j  1.00000000e+00+0.j -7.59087415e+01+0.j]
 [ 1.26500000e+03+0.j  1.00000000e+00+0.j  3.50153502e+01+0.j]
 [ 1.26600000e+03+0.j  1.00000000e+00+0.j  1.23100737e+02+0.j]
 [ 1.26700000e+03+0.j  1.00000000e+00+0.j -2.04186295e+02+0.j]
 [ 1.26800000e+03+0.j  1.00000000e+00+0.j -8.89846080e+01+0.j]
 [ 1.26900000e+03+0.j  1.00000000e+00+0.j  1.55351055e+02+0.j]
 [ 1.27000000e+03+0.j  1.00000000e+00+0.j  2.40189454e+

In [49]:
# 将数据保存为npy文件，方便读取
np.save('./FaceDataset/dataset_feature', dataset_feature)
np.save('./FaceDataset/W_Mat', W_Mat)

In [50]:
# 测试数据加载

dataset_feature = np.load('./FaceDataset/dataset_feature.npy')
print('type :', type(dataset_feature))
print('shape :', dataset_feature.shape)
print('dataset_feature :')
print(dataset_feature)

type : <class 'numpy.ndarray'>
shape : (1997, 3)
dataset_feature :
[[ 1.22300000e+03+0.j  1.00000000e+00+0.j -1.46768361e+02+0.j]
 [ 1.22400000e+03+0.j  1.00000000e+00+0.j -9.09718368e+01+0.j]
 [ 1.22500000e+03+0.j  1.00000000e+00+0.j  9.18925873e+00+0.j]
 ...
 [ 3.22000000e+03+0.j  0.00000000e+00+0.j  8.73037391e+01+0.j]
 [ 3.22100000e+03+0.j  0.00000000e+00+0.j -3.11673544e+02+0.j]
 [ 3.22200000e+03+0.j  0.00000000e+00+0.j -3.57584051e+02+0.j]]


In [52]:
# 测试数据加载

W_Mat = np.load('./FaceDataset/W_Mat.npy')
print('type :', type(W_Mat))
print('shape :', W_Mat.shape)
print('dataset_feature :')
print(W_Mat)

type : <class 'numpy.ndarray'>
shape : (99, 1)
dataset_feature :
[[-0.00801748+0.j]
 [-0.0167241 +0.j]
 [-0.01458524+0.j]
 [ 0.07449896+0.j]
 [-0.02827698+0.j]
 [ 0.0397287 +0.j]
 [-0.12408401+0.j]
 [-0.13569042+0.j]
 [ 0.00625193+0.j]
 [-0.06384975+0.j]
 [ 0.03679197+0.j]
 [-0.08798134+0.j]
 [-0.08937527+0.j]
 [-0.06772857+0.j]
 [-0.00736714+0.j]
 [ 0.02579685+0.j]
 [-0.05971839+0.j]
 [-0.05401217+0.j]
 [-0.07067269+0.j]
 [ 0.03981524+0.j]
 [-0.0765536 +0.j]
 [-0.0609326 +0.j]
 [-0.01096937+0.j]
 [-0.01296459+0.j]
 [ 0.03372909+0.j]
 [ 0.00547426+0.j]
 [ 0.00367972+0.j]
 [ 0.09469401+0.j]
 [-0.04769768+0.j]
 [-0.14325161+0.j]
 [-0.00208518+0.j]
 [ 0.02685563+0.j]
 [-0.09745485+0.j]
 [-0.04444397+0.j]
 [ 0.02191295+0.j]
 [ 0.07079508+0.j]
 [ 0.12959865+0.j]
 [ 0.03812716+0.j]
 [ 0.06605919+0.j]
 [ 0.08101063+0.j]
 [ 0.07174385+0.j]
 [-0.17091572+0.j]
 [-0.04548555+0.j]
 [ 0.05539888+0.j]
 [ 0.0233513 +0.j]
 [ 0.03372269+0.j]
 [-0.03562281+0.j]
 [-0.08657293+0.j]
 [ 0.08436631+0.j]
 [ 0