In [1]:
import numpy as np

In [33]:
data = np.array([
    [1,2,3],
    [-2,-3.5,1],
    [3,5,1],
    [-4,-7,2]
])

In [34]:
mean_vec = np.mean(data,axis=0)
print(mean_vec)
print(mean_vec.shape)

[-0.5   -0.875  1.75 ]
(3,)


In [36]:
res = data - mean_vec
print(res)
print(res.shape)

[[ 1.5    2.875  1.25 ]
 [-1.5   -2.625 -0.75 ]
 [ 3.5    5.875 -0.75 ]
 [-3.5   -6.125  0.25 ]]
(4, 3)


In [37]:
covMat = np.cov(res, rowvar=0)
fValue, fVector = np.linalg.eig(covMat)
print(fValue)
print(fVector)

[3.87204521e+01 2.33159578e-03 9.23049676e-01]
[[ 0.49947386  0.86308497 -0.0749013 ]
 [ 0.86631414 -0.49708942  0.04900931]
 [-0.00506655  0.08936693  0.99598588]]


In [40]:
fValueSort = np.argsort(fValue)
# step=-1，从右往左取值，从“终点”开始一直取到end_index=-(1+1 )（该点不包括）。
fValueTopN = fValueSort[:-(2+1):-1]
print(fValueSort)
print(fValueTopN)
fVector[:, fValueTopN]

[1 2 0]
[0 2]


array([[ 0.49947386, -0.0749013 ],
       [ 0.86631414,  0.04900931],
       [-0.00506655,  0.99598588]])

In [39]:
x = [2,1,3,4,6,5]
x[:-(1):-1]

[]

In [41]:
# -*-coding:utf-8-*-

"""
    Author: Thinkgamer
    Desc:
        代码4-3  PCA降维
"""
import numpy as np
from sklearn import datasets

class PCATest:
    def __init__(self):
        pass

    # 加载鸢尾花数据集中的特征作为PCA的原始数据集 并进行标准化
    def loadIris(self):
        data = datasets.load_iris()["data"]
        return data

    # 标准化数据
    def Standard(self,data):
        # axis=0按列取均值
        mean_vector=np.mean(data,axis=0)
        return mean_vector,data - mean_vector

    # 计算协方差矩阵
    def getCovMatrix(self,newData):
        # rowvar=0表示数据的每一列代表一个feature
        return np.cov(newData,rowvar=0)

    # 计算协方差矩阵的特征值和特征向量
    def getFValueAndFVector(self,covMatrix):
        fValue,fVector = np.linalg.eig(covMatrix)
        return fValue,fVector

    # 得到特征向量矩阵
    def getVectorMatrix(self,fValue,fVector,k):
        fValueSort = np.argsort(fValue)
        fValueTopN = fValueSort[:-(k + 1):-1]
        return fVector[:,fValueTopN]

    # 得到降维后的数据
    def getResult(self,data,vectorMatrix):
        return np.dot(data,vectorMatrix)

if __name__ == "__main__":
    # 创建PCA对象
    pcatest = PCATest()
    # 加载iris数据集
    data = pcatest.loadIris()
    # 归一化数据
    mean_vector,newData = pcatest.Standard(data)
    # 得到协方差矩阵
    covMatrix = pcatest.getCovMatrix(newData)
    print("协方差矩阵为:\n {}".format(covMatrix))
    # 得到特征值和特征向量
    fValue, fVector = pcatest.getFValueAndFVector(covMatrix)
    print("特征值为:{}".format(fValue))
    print("特征向量为:\n{}".format(fVector))
    # 得到要降到k维的特征向量矩阵
    vectorMatrix = pcatest.getVectorMatrix(fValue, fVector, k=2)
    print("K维特征向量矩阵为:\n{}".format(vectorMatrix))
    # 计算结果
    result = pcatest.getResult(newData,vectorMatrix)
    print("最终降维结果为:\n{}".format(result))
    # 得到重构数据
    print("最终重构结果为:\n{}".format( np.mat(result) * vectorMatrix.T + mean_vector) )

协方差矩阵为:
 [[ 0.68569351 -0.042434    1.27431544  0.51627069]
 [-0.042434    0.18997942 -0.32965638 -0.12163937]
 [ 1.27431544 -0.32965638  3.11627785  1.2956094 ]
 [ 0.51627069 -0.12163937  1.2956094   0.58100626]]
特征值为:[4.22824171 0.24267075 0.0782095  0.02383509]
特征向量为:
[[ 0.36138659 -0.65658877 -0.58202985  0.31548719]
 [-0.08452251 -0.73016143  0.59791083 -0.3197231 ]
 [ 0.85667061  0.17337266  0.07623608 -0.47983899]
 [ 0.3582892   0.07548102  0.54583143  0.75365743]]
K维特征向量矩阵为:
[[ 0.36138659 -0.65658877]
 [-0.08452251 -0.73016143]
 [ 0.85667061  0.17337266]
 [ 0.3582892   0.07548102]]
最终降维结果为:
[[-2.68412563 -0.31939725]
 [-2.71414169  0.17700123]
 [-2.88899057  0.14494943]
 [-2.74534286  0.31829898]
 [-2.72871654 -0.32675451]
 [-2.28085963 -0.74133045]
 [-2.82053775  0.08946138]
 [-2.62614497 -0.16338496]
 [-2.88638273  0.57831175]
 [-2.6727558   0.11377425]
 [-2.50694709 -0.6450689 ]
 [-2.61275523 -0.01472994]
 [-2.78610927  0.235112  ]
 [-3.22380374  0.51139459]
 [-2.64475039 -1