## 라이브러리 import

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as sp
import pywt

## 데이터 크기 정의하기

In [2]:
NoOfData    = 180  # 정상/고장 데이터 각 180개씩 
NoOfSensor  = 3    # 가속도(Acceleration), 전압(Voltage), 전류(Current)
NoOfFeature = 10   # 특징 개수:10개 (순서: Max, Min, Mean, RMS, Variance, Skewness, Kurtosis, Crest factor, Impulse factor, Shape factor)

NoOfData, NoOfSensor, NoOfFeature

(180, 3, 10)

## Time Domain 특징값 추출 (10 features * 3 sensors = 30개씩)

In [3]:
def rms(x): # RMS 함수 정의
    return np.sqrt(np.mean(x**2))

In [4]:
# 특징데이터 크기 지정
TimeFeature_Normal   = np.zeros((NoOfSensor*NoOfFeature , NoOfData))
TimeFeature_Abnormal = np.zeros((NoOfSensor*NoOfFeature , NoOfData))

for i in range(NoOfData):
    
    # 데이터 불러오기
    temp_path1 = './SpotWeldingData/Normal_%d'%(i+1)   # Normal 데이터 파일 경로
    temp_path2 = './SpotWeldingData/Abnormal_%d'%(i+1) # Abnormal 데이터 파일 경로
    temp_data1 = pd.read_csv(temp_path1 , sep=',' , header=None) # 임시 Normal 데이터
    temp_data2 = pd.read_csv(temp_path2 , sep=',' , header=None) # 임시 Abnormal 데이터
    
    # Time Domain 특징값 추출
    for j in range(NoOfSensor):
        
        # Normal Time Domain Feature
        TimeFeature_Normal[10*j+0, i] = np.max(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+1, i] = np.min(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+2, i] = np.mean(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+3, i] = rms(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+4, i] = np.var(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+5, i] = sp.skew(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+6, i] = sp.kurtosis(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+7, i] = np.max(temp_data1.iloc[:,j+1])/rms(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+8, i] = rms(temp_data1.iloc[:,j+1])/np.mean(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[10*j+9, i] = np.max(temp_data1.iloc[:,j+1])/np.mean(temp_data1.iloc[:,j+1])
        
        # Abnormal Time Domain Feature
        TimeFeature_Abnormal[10*j+0, i] = np.max(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+1, i] = np.min(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+2, i] = np.mean(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+3, i] = rms(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+4, i] = np.var(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+5, i] = sp.skew(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+6, i] = sp.kurtosis(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+7, i] = np.max(temp_data2.iloc[:,j+1])/rms(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+8, i] = rms(temp_data2.iloc[:,j+1])/np.mean(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[10*j+9, i] = np.max(temp_data2.iloc[:,j+1])/np.mean(temp_data2.iloc[:,j+1])
        
print(TimeFeature_Normal.shape)
print(TimeFeature_Abnormal.shape)

(30, 180)
(30, 180)


Normal, Abnormal 데이터 합치기  

In [5]:
TimeFeature = np.concatenate([TimeFeature_Normal, TimeFeature_Abnormal] , axis=1)
TimeFeature.shape

(30, 360)

## Frequncy Domain 특징값 추출 (10 features \* 8 wavelet levels * 3 sensors = 240개씩)

In [6]:
# Wavelet options

MotherWavelet = pywt.Wavelet('db4')   # Mother wavelet (모함수) 지정
Level   = 8                    # Wavelet 분해 레벨 지정
select  = 8                    # 특징추출 영역 고주파 영역부터 개수 지정 (d1~)

In [7]:
#Frequency Domain 특징값 추출 (Wavelet Transform 기반)
FreqFeature_Normal   = np.zeros(shape=(NoOfSensor*NoOfFeature*select , NoOfData))
FreqFeature_Abnormal = np.zeros(shape=(NoOfSensor*NoOfFeature*select , NoOfData))

for i in range(NoOfData):
    
    # 데이터 불러오기
    temp_path1 = './SpotWeldingData/Normal_%d'%(i+1)   # Normal 데이터 파일 경로
    temp_path2 = './SpotWeldingData/Abnormal_%d'%(i+1) # Abnormal 데이터 파일 경로
    temp_data1 = np.array(pd.read_csv(temp_path1 , sep=',', header=None).iloc[:,1:]) # 임시 Normal 데이터
    temp_data2 = np.array(pd.read_csv(temp_path2 , sep=',', header=None).iloc[:,1:]) # 임시 Abnormal 데이터
    Coef1      = pywt.wavedec(temp_data1, MotherWavelet, level=Level, axis=0)
    Coef2      = pywt.wavedec(temp_data2, MotherWavelet, level=Level, axis=0)
    
    # Frequency Domain 특징값 추출
    for j in range(NoOfSensor):
        
        for k in np.arange(select):
            coef1 = Coef1[Level-k]
            coef2 = Coef2[Level-k]
            
            
            # Normal Frequency Domain Feature
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+0 , (i-1)] = np.max(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+1 , (i-1)] = np.min(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+2 , (i-1)] = np.mean(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+3 , (i-1)] = np.var(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+4 , (i-1)] = rms(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+5 , (i-1)] = sp.skew(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+6 , (i-1)] = sp.kurtosis(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+7 , (i-1)] = np.max(coef1[:,j])/rms(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+8 , (i-1)] = rms(coef1[:,j])/np.mean(coef1[:,j])
            FreqFeature_Normal[NoOfFeature*j*select+k*NoOfFeature+9 , (i-1)] = np.max(coef1[:,j])/np.mean(coef1[:,j])
            
            # Abnormal Frequency Domain Feature
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+0 , (i-1)] = np.max(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+1 , (i-1)] = np.min(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+2 , (i-1)] = np.mean(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+3 , (i-1)] = np.var(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+4 , (i-1)] = rms(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+5 , (i-1)] = sp.skew(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+6 , (i-1)] = sp.kurtosis(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+7 , (i-1)] = np.max(coef2[:,j])/rms(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+8 , (i-1)] = rms(coef2[:,j])/np.mean(coef2[:,j])
            FreqFeature_Abnormal[NoOfFeature*j*select+k*NoOfFeature+9 , (i-1)] = np.max(coef2[:,j])/np.mean(coef2[:,j])

print(FreqFeature_Normal.shape)
print(FreqFeature_Abnormal.shape)

(240, 180)
(240, 180)


Normal, Abnormal 특징값 합치기      

In [8]:
FreqFeature = np.concatenate([FreqFeature_Normal, FreqFeature_Abnormal] , axis=1)
FreqFeature.shape

(240, 360)

## Feature 통합 (Time domain 30개 + Freq. domain 240 = 270개)

In [9]:
Features = np.concatenate([TimeFeature,FreqFeature] , axis=0)

print("Feature Data Size :", Features.shape)
print("= 데이터 360개(정상/고장 각 180개씩)가 각각 270개의 특징값으로 구성됨")

Feature Data Size : (270, 360)
= 데이터 360개(정상/고장 각 180개씩)가 각각 270개의 특징값으로 구성됨


저장에 용이한 Data frame 형식으로 변환

In [10]:
FeatureData = pd.DataFrame(Features)
FeatureData

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,350,351,352,353,354,355,356,357,358,359
0,1.351000,31.661000,31.832000,1.418300,1.053400,30.628000,0.992040,0.992420,1.059700,1.170800,...,0.931090,0.732910,1.016000,0.717950,0.853310,0.747490,0.718320,0.845460,0.848450,0.758330
1,-1.372000,-22.786000,-23.613000,-1.085600,-1.057500,-19.468000,-1.319600,-1.056400,-2.041700,-1.343200,...,-1.564900,-0.730310,-1.387000,-0.796880,-0.860070,-0.782290,-0.513800,-0.656270,-0.747890,-0.914290
2,0.011083,0.023339,0.020506,0.027215,0.016574,0.018563,0.020904,0.024480,0.029605,0.028426,...,0.029894,0.027896,0.032512,0.036554,0.031676,0.037731,0.036942,0.035927,0.035836,0.036279
3,0.426105,2.312749,2.313820,0.396240,0.388252,2.088591,0.403801,0.404898,0.381526,0.412919,...,0.339540,0.317581,0.335417,0.328550,0.336647,0.338766,0.265980,0.321351,0.323284,0.302785
4,0.181443,5.348262,5.353342,0.156266,0.150465,4.361866,0.162618,0.163343,0.144686,0.169694,...,0.114394,0.100079,0.111448,0.106609,0.112328,0.113339,0.069381,0.101976,0.103229,0.090362
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,-0.185952,-0.187187,-0.171520,-0.189503,-0.201445,-0.186237,-0.199074,-0.170394,-0.190345,-0.170432,...,-0.190720,-0.176745,-0.190214,-0.194586,-0.187871,-0.193369,-0.188452,-0.190433,-0.186167,-0.203258
266,-0.897746,-0.845829,-0.844962,-0.857929,-0.914043,-0.868960,-0.916918,-0.821813,-0.860336,-0.853039,...,-0.938659,-0.843433,-0.926418,-0.891989,-0.934288,-0.930963,-0.864821,-0.835896,-0.846398,-0.797992
267,1.794657,1.830514,1.834013,1.826639,1.792487,1.817414,1.791401,1.853724,1.829700,1.840133,...,1.777513,1.847621,1.785046,1.795797,1.780019,1.778696,1.828450,1.843831,1.831464,1.833629
268,8.293784,8.196896,7.671136,7.465559,7.029386,7.119339,6.689375,8.137360,7.335499,8.038893,...,6.463547,7.512968,6.494972,7.103328,6.686745,6.681085,6.412330,7.825111,7.715043,7.490327


추출된 특징 데이터 저장 (.csv 파일)

In [12]:
path = './ProcessedData/FeatureData'   # path = '파일 경로/저장할 파일 이름'
FeatureData.to_csv(path, sep=',', header=None , index=None)

# [실습 과제 2]

## 1. 센서 데이터 2개 (전압, 전류) 및 WT level 6으로 설정하여 다른 형태의 특징데이터 추출 
#### >>>>>> 저장된 특징 데이터 파일 제출
#### >>>>>> 데이터 이름 : ST(수강생 번호)_HW2_1  (예시 : 'ST000_HW2_1'  //  'ST00_HW2_1'  //  'ST0_HW2_1')
#### >>>>>> 데이터 이름 중 'ST' , 'HW' 등 영어는 모두 대문자

### ****** 필독 !! 실습과제 주의사항 ******

- 각자의 "수강생 번호" 확인 (아이캠퍼스 공지) 
- 제출하는 실습과제 파일에 "수강생 번호"를 기준으로 작성 (이름, 학번 등 작성X)
- 각 실습과제에 대한 구체적인 파일 이름은 매번 개별 안내 
  (수강생 번호 123번 학생 과제파일 예시 : 'ST123_HW2_1.csv' )
- 과제 파일이름 양식 지키지 않을 시 감점!