## 라이브러리 import

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

## 데이터 불러오기

In [None]:
# 특징값 데이터 불러와서 정상/고장 분리
FeatureData = pd.read_csv('./SavedData/FeatureData', sep=',', header=None)

Normal_Feature   = FeatureData.iloc[:,:100]
Abnormal_Feature = FeatureData.iloc[:,100:]

In [None]:
# P value 순위 데이터 불러오기
path = './SavedData/P_value_Rank'
P_value_Rank = pd.read_csv(path, sep=',', header=None)

## P value 기준 Feature 선정

In [None]:
# StartRank 부터 Number 만큼의 Feature
StartRank = 361
Number = 30

NormalSelected   = np.zeros(shape=(Number,100))
AbnormalSelected = np.zeros(shape=(Number,100))

s = 0

for i in range(StartRank, StartRank+Number):
    
    index                 = int(P_value_Rank.iloc[i-1,0])
    NormalSelected[s,:]   = Normal_Feature.iloc[index,:].values
    AbnormalSelected[s,:] = Abnormal_Feature.iloc[index,:].values
    s += 1

# 정상, 고장 특징값 합치기    
FeatureSelected = np.transpose(pd.DataFrame(np.concatenate([NormalSelected, AbnormalSelected],axis=1)))
FeatureSelected.shape

## 데이터 정규화

In [None]:
# 선정된 특징데이터에 대한 정규화
FeatureSelected_std = StandardScaler().fit_transform(FeatureSelected)
show = pd.DataFrame(FeatureSelected_std)
show

## 선정된 Feature PCA

In [None]:
# 10개 PC(Principal Component) 추출
pca = PCA(n_components = 10)
PC = pca.fit_transform(FeatureSelected_std)

# 설명된 분산의 비율
VarRatio = pca.explained_variance_ratio_
VarRatio = pd.DataFrame(np.round_(VarRatio,3))

# 설명된 분산의 비율 누적
CumVarRatio = np.cumsum(pca.explained_variance_ratio_)
CumVarRatio_df = pd.DataFrame(np.round_(CumVarRatio,3))

Result = pd.concat([VarRatio , CumVarRatio_df], axis=1)
Result


In [None]:
# 누적값 그래프로 시각화
plt.plot(range(1,11), CumVarRatio*100, color='green', linestyle='-', marker='o')
plt.ylim(0,100)
plt.xlim(0,10)
plt.xlabel('PC Number')
plt.ylabel('Percentage(%)')
plt.show()

## Low Rank Feature PCA 시각화

In [None]:
plt.plot(PC[:100,0], PC[:100,1], color='b', linestyle='', marker='o', label='Normal')
plt.plot(PC[100:,0], PC[100:,1], color='r', linestyle='', marker='o', label='Abnormal')
plt.legend()
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.show()