In [None]:
import pandas as pd
import numpy as np
import librosa
import os
import pywt
import sklearn 
import scipy.stats as sp
import random
random.seed(777)
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from tensorflow import keras
from sklearn.model_selection import KFold
import tensorflow as tf
import joblib

### 0. 데이터 불러오기

In [None]:
from google.colab import drive
drive.mount('/content/drive')
#데이터 path 설정
StdPath = os.path.join(os.getcwd(),os.path.join('drive',os.path.join('MyDrive',os.path.join('Colab Notebooks',os.path.join('데이콘','기계')))))

Mounted at /content/drive


In [None]:
Train_info = pd.read_csv(os.path.join(StdPath,'data/train.csv'),sep=',')
Test_info  = pd.read_csv(os.path.join(StdPath,'data/test.csv'),sep=',')

### 1. 데이터 특징 추출

1-1. 음성 길이 측정

In [None]:
Sec = []
for i in range(len(Train_info)):
  data, SR = librosa.load(os.path.join(StdPath,'data')+Train_info['SAMPLE_PATH'].iloc[i][1:], sr=16000)
  sec = data.shape[0]/float(SR)
  Sec.append(sec)
Sec_info = pd.DataFrame(Sec)
Sec_info.describe()

Unnamed: 0,0
count,1279.0
mean,10.0
std,0.0
min,10.0
25%,10.0
50%,10.0
75%,10.0
max,10.0


1-2. 데이터 특징 함수 

Time Domain 통계량은 15 = 15개

(Max,Min,Mean,Rms,Std,Skew,Kurt,CF,IF,SF,Median,Mode,Q1,Q3,Iqr)

Freq Domain = 10 wavelet levels * 15  =  150개

따라서 Feature 개수는 15+150 = 165개이다.

즉, DataFrame는 (1279,165)개가 된다.

In [None]:
def rms(x):
  return np.sqrt(np.mean(x**2))

In [None]:
def Feature_Make(data):
  Domain = pd.DataFrame()
  Max = np.max(data)
  Min = np.min(data)
  Mean = np.mean(data)
  Rms = rms(data)
  Std = np.std(data)
  Skew =sp.skew(data)
  Kurt = sp.kurtosis(data)
  if Rms == 0:
    Cf = np.NaN
  else:
    Cf = Max/Rms
  if Mean==0:
    If = np.NaN
  else:
    If = Rms/Mean
  if Mean  == 0:
    Sf = 0
  else:
    Sf = Max/Mean
  Median = np.median(data)
  Mode = sp.mode(data)[0][0]
  q1 = np.quantile(data,0.25)
  q3 = np.quantile(data,0.75)
  Iqr = q3-q1
  List = [Max,Min,Mean,Rms,Std,Skew,Kurt,Cf,If,Sf,Median,Mode,q1,q3,Iqr]
  Domain = pd.DataFrame(List,index=['Max','Min','Mean','Rms','Std','Skew','Kurt','Cf','If','Sf','Median','Mode','q1','q3','Iqr'])
  Domain = Domain.transpose()
  return Domain

In [None]:
def Frequency_Domain(data):
  Mom = pywt.Wavelet('haar')# 모함수 지정
  Coefficient = pywt.wavedec(data,'haar', level=10)[1:]
  Domain = pd.DataFrame()
  for Coef in Coefficient:
    Domain = pd.concat([Domain,Feature_Make(Coef)],axis = 1)
  return Domain 

In [None]:
def TimeAndFreq(Data):
  Time = Feature_Make(Data)
  Freq = Frequency_Domain(Data)
  feature = pd.concat([Time,Freq],axis=1)
  return feature

1-3. Train Feature 추출

In [None]:
Feature = pd.DataFrame()
for i in range(len(Train_info)):
  data, SR = librosa.load(os.path.join(StdPath,'data')+Train_info['SAMPLE_PATH'].iloc[i][1:], sr=16000)
  temp_feature=TimeAndFreq(data)
  Feature = pd.concat([Feature,temp_feature],axis=0) # 데이터 순서대로 쌓기  
Feature.columns=np.arange(Feature.shape[1])
Feature=Feature.reset_index(drop=True)
Train_Feature=Feature

Train_Feature Descirbe

|       |             0 |             1 |              2 |              3 |              4 |             5 |             6 |           7 |                 8 |                9 |             10 |             11 |             12 |             13 |             14 |            15 |            16 |             17 |            18 |            19 |           20 |          21 |          22 |          23 |          24 |             25 |            26 |            27 |            28 |            29 |            30 |           31 |             32 |            33 |            34 |           35 |           36 |          37 |            38 |          39 |             40 |           41 |            42 |            43 |            44 |           45 |           46 |             47 |            48 |            49 |           50 |           51 |          52 |         53 |          54 |             55 |           56 |            57 |            58 |            59 |           60 |           61 |             62 |            63 |            64 |           65 |           66 |          67 |          68 |          69 |             70 |           71 |            72 |            73 |            74 |            75 |            76 |             77 |            78 |            79 |           80 |           81 |          82 |          83 |          84 |             85 |           86 |            87 |            88 |            89 |            90 |            91 |             92 |            93 |            94 |           95 |           96 |          97 |          98 |          99 |            100 |          101 |           102 |           103 |           104 |           105 |           106 |            107 |           108 |           109 |           110 |          111 |         112 |         113 |         114 |            115 |            116 |           117 |           118 |           119 |           120 |           121 |            122 |           123 |           124 |           125 |          126 |         127 |         128 |              129 |            130 |           131 |           132 |           133 |           134 |           135 |           136 |            137 |            138 |            139 |           140 |          141 |         142 |              143 |             144 |            145 |            146 |            147 |            148 |           149 |           150 |           151 |            152 |            153 |            154 |           155 |           156 |         157 |               158 |               159 |            160 |            161 |            162 |            163 |            164 |
|:------|--------------:|--------------:|---------------:|---------------:|---------------:|--------------:|--------------:|------------:|------------------:|-----------------:|---------------:|---------------:|---------------:|---------------:|---------------:|--------------:|--------------:|---------------:|--------------:|--------------:|-------------:|------------:|------------:|------------:|------------:|---------------:|--------------:|--------------:|--------------:|--------------:|--------------:|-------------:|---------------:|--------------:|--------------:|-------------:|-------------:|------------:|--------------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|--------------:|--------------:|-------------:|-------------:|------------:|-----------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|--------------:|--------------:|-------------:|-------------:|------------:|------------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|--------------:|--------------:|-------------:|-------------:|------------:|------------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|--------------:|--------------:|-------------:|-------------:|------------:|------------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|--------------:|--------------:|--------------:|-------------:|------------:|------------:|------------:|---------------:|---------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|--------------:|--------------:|--------------:|-------------:|------------:|------------:|-----------------:|---------------:|--------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|---------------:|---------------:|--------------:|-------------:|------------:|-----------------:|----------------:|---------------:|---------------:|---------------:|---------------:|--------------:|--------------:|--------------:|---------------:|---------------:|---------------:|--------------:|--------------:|------------:|------------------:|------------------:|---------------:|---------------:|---------------:|---------------:|---------------:|
| count | 1279          | 1279          | 1279           | 1279           | 1279           | 1279          | 1279          | 1279        |    1279           |   1279           | 1279           | 1279           | 1279           | 1279           | 1279           | 1279          | 1279          | 1279           | 1279          | 1279          | 1279         | 1279        | 1279        |   1279      |   1279      | 1279           | 1279          | 1279          | 1279          | 1279          | 1279          | 1279         | 1279           | 1279          | 1279          | 1279         | 1279         | 1279        |   1279        |   1279      | 1279           | 1279         | 1279          | 1279          | 1279          | 1279         | 1279         | 1279           | 1279          | 1279          | 1279         | 1279         | 1279        |  1279      |   1279      | 1279           | 1279         | 1279          | 1279          | 1279          | 1279         | 1279         | 1279           | 1279          | 1279          | 1279         | 1279         | 1279        |   1279      |   1279      | 1279           | 1279         | 1279          | 1279          | 1279          | 1279          | 1279          | 1279           | 1279          | 1279          | 1279         | 1279         | 1279        |   1279      |   1279      | 1279           | 1279         | 1279          | 1279          | 1279          | 1279          | 1279          | 1279           | 1279          | 1279          | 1279         | 1279         | 1279        |   1279      |    1279     | 1279           | 1279         | 1279          | 1279          | 1279          | 1279          | 1279          | 1279           | 1279          | 1279          | 1279          | 1279         | 1279        |   1279      |    1279     | 1279           | 1279           | 1279          | 1279          | 1279          | 1279          | 1279          | 1279           | 1279          | 1279          | 1279          | 1279         | 1279        |    1279     |    1279          | 1279           | 1279          | 1279          | 1279          | 1279          | 1279          | 1279          | 1279           | 1279           | 1279           | 1279          | 1279         | 1279        |   1279           |  1279           | 1279           | 1279           | 1279           | 1279           | 1279          | 1279          | 1279          | 1279           | 1279           | 1279           | 1279          | 1279          | 1279        |    1279           |    1279           | 1279           | 1279           | 1279           | 1279           | 1279           |
| mean  |    0.0202105  |   -0.0216536  |   -9.77762e-08 |    0.00485594  |    0.00485594  |   -0.0239938  |   -0.0541867  |    4.15103  |   15467.9         |  63275           |    6.60321e-06 |   -0.000645619 |   -0.00329725  |    0.00331998  |    0.00661723  |    0.0232411  |   -0.0217998  |   -1.30187e-05 |    0.00900525 |    0.0089891  |    0.0977517 |   -0.242415 |    2.59623  |     14.1506 |     62.8449 |   -0.000340225 |   -0.0217998  |   -0.00640924 |    0.00619102 |    0.0126003  |    0.035198   |   -0.0342787 |   -7.57311e-06 |    0.0139002  |    0.0138895  |    0.042779  |   -0.425946  |    2.63577  |     -0.499569 |     16.8986 |   -0.000410667 |   -0.0342142 |   -0.0106782  |    0.0106787  |    0.021357   |    0.0653353 |   -0.0634427 |    1.20297e-05 |    0.0223211  |    0.0223066  |    0.0243119 |   -0.183003  |    2.98441  |   -17.6999 |    -49.2522 |   -0.000184257 |   -0.0632936 |   -0.0157411  |    0.015586   |    0.0313272  |    0.0606746 |   -0.0578931 |   -2.73334e-06 |    0.0194845  |    0.0194818  |    0.0622388 |   -0.188667  |    3.16163  |    225.945  |    714.913  |   -0.000644798 |   -0.0567943 |   -0.0138739  |    0.0136273  |    0.0275012  |    0.0496377  |   -0.0484269  |   -6.79331e-06 |    0.0143808  |    0.0143794  |    0.0499315 |   -0.0617335 |    3.45779  |     10.9898 |     30.0826 |   -0.000234787 |   -0.0458204 |   -0.00987876 |    0.00972878 |    0.0196075  |    0.0450367  |   -0.0463355  |   -4.97742e-07 |    0.0119039  |    0.0119031  |   -0.0165776 |    0.094276  |    3.80226  |    317.174  |    1181.8   |    2.3893e-05  |   -0.0371431 |   -0.00794101 |    0.00798703 |    0.015928   |    0.0324718  |   -0.0338081  |   -3.71689e-06 |    0.00811459 |    0.00811438 |   -0.0275276  |    0.158401  |    4.06354  |     71.5052 |     358.513 |    3.20188e-05 |   -0.0135028   |   -0.00538832 |    0.00542324 |    0.0108116  |    0.0189985  |   -0.0192129  |   -2.19022e-07 |    0.00444422 |    0.00444418 |   -0.00624578 |    0.166671  |    4.37448  |     182.251 |     939.137      |    6.60851e-06 |   -0.00488106 |   -0.00296031 |    0.00296716 |    0.00592747 |    0.0101968  |   -0.0103225  |   -6.18145e-08 |    0.00227046  |    0.00227045  |   -0.0062757  |    0.178038  |    4.63574  |   6555.85        | 34521.8         |    4.23584e-06 |   -0.00207468  |   -0.00151222  |    0.00151748  |    0.0030297  |    0.00489361 |   -0.00496595 |    1.21446e-10 |    0.00104121  |    0.00104121  |   -0.00742917 |    0.214792   |    4.88637  |  130180           |  631079           |    2.09115e-06 |   -0.000621271 |   -0.000692579 |    0.000695287 |    0.00138787  |
| std   |    0.00284871 |    0.00244649 |    1.38454e-06 |    0.000249305 |    0.000249305 |    0.0249357  |    0.117125   |    0.451982 |  298607           |      1.23881e+06 |    2.82236e-05 |    0.00154378  |    0.000146303 |    0.000144659 |    0.000289666 |    0.00850149 |    0.00745634 |    0.000536796 |    0.00313    |    0.00313039 |    0.198962  |    0.448856 |    0.368926 |   1279.23   |   3600.57   |    0.00077485  |    0.00745634 |    0.00251636 |    0.00237848 |    0.00471795 |    0.0129856  |    0.0121544 |    0.000531124 |    0.00573703 |    0.00573845 |    0.123523  |    0.561559  |    0.446995 |   1003.62     |   2493.2    |    0.000922183 |    0.0122325 |    0.00520872 |    0.00519321 |    0.0103142  |    0.0289733 |    0.0271326 |    0.000869566 |    0.0102943  |    0.0102891  |    0.0986343 |    0.289794  |    0.360534 |   435.66   |   1254.35   |    0.00121032  |    0.0272872 |    0.00774411 |    0.00755803 |    0.0151698  |    0.0233478 |    0.0198822 |    0.000322382 |    0.00784404 |    0.00784403 |    0.120402  |    0.294073  |    0.354648 |   6094.11   |  19212.8    |    0.000984577 |    0.0214899 |    0.00609919 |    0.00580992 |    0.0118771  |    0.00784177 |    0.00677718 |    0.000197612 |    0.00202738 |    0.00202779 |    0.0890343 |    0.148382  |    0.321959 |   1280.89   |   4292.7    |    0.00041465  |    0.013295  |    0.00161014 |    0.00143979 |    0.00301018 |    0.00816695 |    0.00875585 |    0.00014095  |    0.00211182 |    0.00211176 |    0.0447166 |    0.115649  |    0.402993 |   7306.17   |   28903.2   |    0.000202005 |    0.0207568 |    0.00142426 |    0.00148087 |    0.00288713 |    0.00850332 |    0.00917675 |    6.24519e-05 |    0.00235616 |    0.00235606 |    0.0320714  |    0.19838   |    0.469747 |   7307.83   |   31097.5   |    8.84724e-05 |    0.01788     |    0.00159874 |    0.0016225  |    0.00321786 |    0.00584237 |    0.00632182 |    2.13239e-05 |    0.00153372 |    0.00153369 |    0.0231044  |    0.376118  |    0.610982 |    8907.28  |   40519.8        |    3.12493e-05 |    0.00284446 |    0.00103689 |    0.00104589 |    0.00208212 |    0.00379362 |    0.00415169 |    5.47688e-06 |    0.000940057 |    0.000940054 |    0.0219092  |    0.557757  |    0.754049 | 317364           |     1.57812e+06 |    1.11911e-05 |    0.00270995  |    0.000631599 |    0.000638569 |    0.00127006 |    0.00199371 |    0.002198   |    1.05181e-08 |    0.000470668 |    0.000470668 |    0.0199951  |    1.01501    |    0.948238 |       4.1694e+06  |       1.90783e+07 |    3.82577e-06 |    0.000823817 |    0.00031586  |    0.000319129 |    0.000634977 |
| min   |    0.0149942  |   -0.033553   |   -5.26688e-06 |    0.00411773  |    0.00411773  |   -0.150815   |   -0.363775   |    3.21597  | -844135           |     -3.65705e+06 |   -0.000113729 |   -0.011099    |   -0.00369751  |    0.00277325  |    0.00553463  |    0.00702618 |   -0.0692791  |   -0.00268243  |    0.00329803 |    0.00329797 |   -1.15067   |   -1.08446  |    1.79535  | -27524.5    | -62315.8    |   -0.00296206  |   -0.0692791  |   -0.0221756  |    0.00133562 |    0.00395222 |    0.00968736 |   -0.0787524 |   -0.00385445  |    0.00444772 |    0.00444711 |   -0.735493  |   -1.29384   |    1.86674  | -19421.9      | -37921.3    |   -0.00458448  |   -0.0787524 |   -0.0208258  |    0.00217282 |    0.00521426 |    0.0165951 |   -0.129823  |   -0.00306517  |    0.00683714 |    0.00683554 |   -0.354337  |   -1.09923   |    2.01111  | -9241.82   | -23769.3    |   -0.00514956  |   -0.129823  |   -0.0314298  |    0.00407965 |    0.00879688 |    0.021154  |   -0.126654  |   -0.00118647  |    0.00707324 |    0.00707281 |   -0.601158  |   -0.84124   |    2.2575   | -20361.5    | -68715.2    |   -0.00418041  |   -0.126654  |   -0.0260348  |    0.00429888 |    0.00887547 |    0.0301957  |   -0.0777434  |   -0.00159767  |    0.0099426  |    0.00994253 |   -0.209945  |   -0.400456  |    2.72665  | -21166      | -70720.1    |   -0.00248418  |   -0.0777434 |   -0.0132697  |    0.00638819 |    0.013195   |    0.0294472  |   -0.0718609  |   -0.000549124 |    0.00811258 |    0.00811196 |   -0.134002  |   -0.186623  |    2.98143  | -39987.9    | -190870     |   -0.000723983 |   -0.0718609 |   -0.0103788  |    0.00523605 |    0.0106238  |    0.0178438  |   -0.0629862  |   -0.000298684 |    0.00456441 |    0.00456441 |   -0.121744   |   -0.0822206 |    3.16329  | -87567      | -355624     |   -0.000301841 |   -0.0593505   |   -0.00819422 |    0.00293617 |    0.00589209 |    0.00933982 |   -0.0372634  |   -9.32305e-05 |    0.00223897 |    0.00223892 |   -0.0817118  |   -0.0620611 |    3.43967  | -170577     | -649620          |   -0.000121649 |   -0.0170695  |   -0.00442037 |    0.00143806 |    0.00291821 |    0.00473279 |   -0.0220797  |   -2.30956e-05 |    0.000986425 |    0.000986422 |   -0.0871029  |   -0.0320298 |    3.71639  |     -2.67384e+06 |    -1.10339e+07 |   -3.00113e-05 |   -0.0101263   |   -0.00238844  |    0.000646334 |    0.00129269 |    0.0019287  |   -0.012536   |   -3.38383e-08 |    0.000412539 |    0.000412539 |   -0.269216   |   -0.00771715 |    3.81336  |      -2.82602e+07 |      -1.25072e+08 |   -5.11033e-06 |   -0.00425122  |   -0.00116438  |    0.000273623 |    0.000545156 |
| 25%   |    0.0176938  |   -0.0231386  |   -8.57196e-07 |    0.00466177  |    0.00466177  |   -0.0362214  |   -0.158953   |    3.79186  |   -6930.3         | -29984           |   -5.34579e-06 |   -4.84288e-08 |   -0.00340709  |    0.00322728  |    0.0064263   |    0.0164128  |   -0.0264182  |   -0.000323932 |    0.00620717 |    0.00619312 |   -0.0280291 |   -0.533162 |    2.33614  |    -28.1388 |    -70.6572 |   -0.000816826 |   -0.0264182  |   -0.00841454 |    0.00415342 |    0.00810539 |    0.0247671  |   -0.0431111 |   -0.000309986 |    0.00897797 |    0.00895024 |   -0.0285959 |   -0.898154  |    2.29477  |    -43.7962   |   -112.543  |   -0.000963604 |   -0.0431065 |   -0.0156093  |    0.00573453 |    0.0118184  |    0.0388314 |   -0.0869125 |   -0.000462445 |    0.0132718  |    0.0132667  |   -0.0389968 |   -0.38642   |    2.73212  |   -39.8899 |   -118.138  |   -0.000763334 |   -0.0868365 |   -0.0228914  |    0.00884361 |    0.0177574  |    0.0387109 |   -0.0753069 |   -0.000193582 |    0.0124952  |    0.0124892  |   -0.026872  |   -0.399283  |    2.91409  |    -90.7774 |   -283.013  |   -0.00138097  |   -0.0751013 |   -0.0196681  |    0.00829201 |    0.0166918  |    0.0437499  |   -0.0526739  |   -0.000128715 |    0.0129239  |    0.0129235  |   -0.0157178 |   -0.169976  |    3.23302  |   -118.953  |   -408.95   |   -0.000500092 |   -0.0524001 |   -0.0113562  |    0.00857501 |    0.0172433  |    0.037923   |   -0.0529994  |   -8.59934e-05 |    0.00986497 |    0.00986482 |   -0.0486982 |    0.0271358 |    3.51917  |   -139.365  |    -530.965 |   -0.000102686 |   -0.0517513 |   -0.00931583 |    0.00654854 |    0.0131376  |    0.0241453  |   -0.0417446  |   -3.31039e-05 |    0.00571452 |    0.00571444 |   -0.0486307  |    0.0516956 |    3.73972  |   -256.492  |   -1076.11  |   -2.0656e-05  |   -0.0266792   |   -0.00694312 |    0.00376615 |    0.00752364 |    0.0132112  |   -0.0246668  |   -8.50271e-06 |    0.00283301 |    0.002833   |   -0.0217223  |    0.0530201 |    3.99807  |    -419.471 |   -1774.61       |   -9.39428e-06 |   -0.00635716 |   -0.0039633  |    0.0018652  |    0.00373955 |    0.00628052 |   -0.0140117  |   -2.71714e-06 |    0.00129679  |    0.00129679  |   -0.022707   |    0.0591161 |    4.1712   |   -718.415       | -3393.29        |   -2.36562e-06 |   -0.00486658  |   -0.00212936  |    0.000860147 |    0.00172111 |    0.00282168 |   -0.00691034 |   -7.10662e-09 |    0.000550834 |    0.000550834 |   -0.0225651  |    0.0626662  |    4.36219  | -130157           | -615021           |   -4.88643e-07 |   -0.00127015  |   -0.000996112 |    0.000365705 |    0.000731558 |
| 50%   |    0.0203467  |   -0.0216918  |   -8.61079e-08 |    0.00490776  |    0.00490776  |   -0.0231937  |   -0.00646837 |    4.14487  |   -1693.79        |  -6589.22        |    8.2776e-06  |    0           |   -0.00332583  |    0.00334699  |    0.00667454  |    0.0235841  |   -0.0219421  |   -1.10291e-05 |    0.00988045 |    0.00986075 |    0.107691  |   -0.305497 |    2.54156  |     -6.3999 |    -14.7742 |   -0.000240982 |   -0.0219421  |   -0.00695254 |    0.00655084 |    0.013804   |    0.0361179  |   -0.0348281 |   -1.44211e-05 |    0.0165019  |    0.016492   |    0.0657931 |   -0.438343  |    2.57701  |     -9.16781  |    -23.6032 |   -0.000273888 |   -0.0347775 |   -0.0125157  |    0.0119988  |    0.0238651  |    0.0662501 |   -0.0678167 |    1.28373e-05 |    0.0233167  |    0.0233132  |    0.0300085 |   -0.210598  |    2.93228  |    11.0833 |     33.0318 |   -6.40184e-05 |   -0.0678112 |   -0.0163486  |    0.0151229  |    0.0338126  |    0.062992  |   -0.0611608 |   -7.06536e-06 |    0.0176863  |    0.0176862  |    0.0677813 |   -0.201157  |    3.1151   |    -25.1357 |    -76.3094 |   -0.000340467 |   -0.0577015 |   -0.0133812  |    0.0137561  |    0.0271373  |    0.0494245  |   -0.04804    |   -8.94374e-06 |    0.014213   |    0.0142124  |    0.0394383 |   -0.0702993 |    3.41796  |    -31.5327 |   -105.065  |   -0.000195549 |   -0.0474876 |   -0.00972064 |    0.00965978 |    0.019339   |    0.0454527  |   -0.0466786  |   -2.8923e-06  |    0.0127771  |    0.0127771  |   -0.018906  |    0.0850798 |    3.73327  |    -31.1944 |    -118.605 |    8.72067e-06 |   -0.041541  |   -0.00823757 |    0.0083135  |    0.0170306  |    0.0339607  |   -0.0353333  |   -4.07517e-06 |    0.00953265 |    0.00953254 |   -0.028031   |    0.124178  |    3.99859  |    -79.912  |    -307.997 |    1.78578e-05 |   -0.00637877  |   -0.00630041 |    0.00631409 |    0.012768   |    0.0210291  |   -0.0214374  |    5.27736e-07 |    0.00547121 |    0.0054711  |   -0.00636997 |    0.0995885 |    4.25125  |     105.835 |     426.059      |    2.24879e-06 |   -0.00448973 |   -0.00366802 |    0.00366548 |    0.0073503  |    0.0120009  |   -0.0123742  |   -4.72356e-08 |    0.00295756  |    0.00295755  |   -0.00707342 |    0.0929045 |    4.4743   |   -172.76        |  -704.357       |    7.89645e-07 |   -0.000455942 |   -0.00196713  |    0.00198956  |    0.00396355 |    0.005906   |   -0.00599126 |    2.50607e-10 |    0.00139956  |    0.00139956  |   -0.00886717 |    0.0855767  |    4.65031  |   26915.3         |  141058           |    8.19331e-07 |   -0.000263544 |   -0.000933575 |    0.000938833 |    0.00187241  |
| 75%   |    0.0222712  |   -0.0197126  |    6.03966e-07 |    0.00506914  |    0.00506914  |   -0.00956164 |    0.045507   |    4.43828  |    6039.23        |  24494.3         |    2.32477e-05 |    0           |   -0.00319881  |    0.00342706  |    0.00683303  |    0.029202   |   -0.0161277  |    0.000290092 |    0.0114171  |    0.0114037  |    0.231963  |   -0.049242 |    2.78672  |     26.4546 |     68.6264 |    0.000144113 |   -0.0161277  |   -0.00408325 |    0.00808505 |    0.016349   |    0.0447743  |   -0.0247209 |    0.000301472 |    0.0190781  |    0.0190727  |    0.123199  |   -0.0117903 |    2.89104  |     39.1701   |    101.206  |    0.000141304 |   -0.0246924 |   -0.00561991 |    0.0155813  |    0.0310966  |    0.091494  |   -0.0387316 |    0.000472243 |    0.0320765  |    0.032046   |    0.086998  |   -0.0250376 |    3.19523  |    38.859  |    119.849  |    0.000512066 |   -0.0387112 |   -0.00875172 |    0.0226513  |    0.0455871  |    0.0817356 |   -0.040129  |    0.000189783 |    0.0270251  |    0.0270246  |    0.156086  |   -0.0228417 |    3.35367  |     89.4037 |    287.654  |    0.000120718 |   -0.0395297 |   -0.0083032  |    0.0190415  |    0.0387302  |    0.0549004  |   -0.0435171  |    0.000122563 |    0.016239   |    0.0162387  |    0.118562  |    0.0264876 |    3.62865  |    111.09   |    376.522  |    5.25623e-05 |   -0.0428046 |   -0.00859081 |    0.0110354  |    0.0223289  |    0.0509582  |   -0.03846    |    8.37889e-05 |    0.0139565  |    0.0139553  |    0.0128003 |    0.15002   |    4.02364  |    128.985  |     497.872 |    0.000145962 |   -0.0346736 |   -0.00658783 |    0.00942602 |    0.0187376  |    0.0396925  |   -0.0250139  |    2.47164e-05 |    0.0104009  |    0.0104008  |   -0.00643115 |    0.232061  |    4.2955   |    259.062  |    1049.42  |    7.64516e-05 |   -3.15718e-05 |   -0.00376861 |    0.00700318 |    0.0139424  |    0.02399    |   -0.0129377  |    8.4428e-06  |    0.00592167 |    0.0059216  |    0.00846038 |    0.193386  |    4.5901   |     454.93  |    2083.54       |    2.08589e-05 |   -0.00298085 |   -0.00187363 |    0.00398095 |    0.00793378 |    0.0135308  |   -0.00607496 |    2.47162e-06 |    0.00318596  |    0.00318595  |    0.0100037  |    0.161186  |    4.85423  |    726.612       |  3363.36        |    9.29076e-06 |    0           |   -0.00086345  |    0.00214275  |    0.00427329 |    0.00662142 |   -0.00266388 |    7.66856e-09 |    0.00149241  |    0.00149241  |    0.00792992 |    0.133297   |    5.11178  |  142416           |  657891           |    4.50046e-06 |    0           |   -0.000366182 |    0.00100209  |    0.00199915  |
| max   |    0.0476033  |   -0.0167592  |    5.33613e-06 |    0.00531749  |    0.00531749  |    0.0897919  |    0.411428   |    9.47823  |       6.97769e+06 |      2.80602e+07 |    0.000113722 |    0.00380068  |   -0.00276139  |    0.00372725  |    0.00742476  |    0.068661   |   -0.00670216 |    0.0027811   |    0.0276159  |    0.0276159  |    1.23795   |    5.41843  |    5.41411  |  30358      | 100605      |    0.00243286  |   -0.00670216 |   -0.00170693 |    0.0218292  |    0.0440048  |    0.0839493  |   -0.0115541 |    0.00282141  |    0.0286408  |    0.0286386  |    0.374661  |    3.99584   |    5.03143  |  15744        |  49265.8    |    0.00328814  |    0.0154245 |   -0.00261323 |    0.0199453  |    0.0407711  |    0.140662  |   -0.0193469 |    0.00368406  |    0.0415964  |    0.0415939  |    0.373975  |    1.70375   |    4.89908  |  2658.51   |   7756.11   |    0.00527795  |    0.0229634 |   -0.00374293 |    0.0302023  |    0.0599786  |    0.12695   |   -0.0194749 |    0.00177468  |    0.0337116  |    0.0337076  |    0.361119  |    3.592     |    7.21541  | 162355      | 532753      |    0.00289185  |    0.0453567 |   -0.00452435 |    0.025384   |    0.0514188  |    0.0869454  |   -0.0293321  |    0.000646425 |    0.0187473  |    0.0187473  |    0.331116  |    1.02917   |    5.13657  |  15658.5    |  52833.9    |    0.000846935 |    0.0318694 |   -0.00630343 |    0.0129859  |    0.0260704  |    0.0807042  |   -0.0297554  |    0.000818275 |    0.0152657  |    0.0152654  |    0.113641  |    2.21592   |    7.44601  | 177623      |  754205     |    0.000842268 |    0.0312644 |   -0.00524453 |    0.0103062  |    0.0202701  |    0.0557098  |   -0.019173   |    0.000348872 |    0.0118169  |    0.011816   |    0.194462   |    4.36531   |    9.02682  | 231221      |  997149     |    0.000519839 |    0.0241604   |   -0.00295481 |    0.00792331 |    0.0160383  |    0.0419335  |   -0.00960271 |    8.90532e-05 |    0.00650875 |    0.00650863 |    0.0861291  |    7.13635   |   10.3536   |  208154     |       1.0902e+06 |    0.000135325 |    0.00654667 |   -0.00146727 |    0.00444341 |    0.00877024 |    0.0286666  |   -0.00449489 |    2.09703e-05 |    0.00355975  |    0.00355973  |    0.114445   |   11.8421    |   10.9904   |      1.10249e+07 |     5.53264e+07 |    5.67007e-05 |    0.00545193  |   -0.000646355 |    0.00238304  |    0.00477064 |    0.0118337  |   -0.00194274 |    3.76208e-08 |    0.00173237  |    0.00173237  |    0.0406698  |   26.4941     |   13.9351   |       1.23345e+08 |       5.4276e+08  |    1.65637e-05 |    0.00173527  |   -0.000271534 |    0.00117101  |    0.00233539  |

Train_Feature 원본

In [None]:
Train_Feature.to_csv(os.path.join(StdPath,'Feature/train.csv'),sep=',',index=None,header=None)
Train_Feature.shape

(1279, 165)

1-4. Test Feature 추출

In [None]:
Feature = pd.DataFrame()
for i in range(len(Test_info)):
  data, SR = librosa.load(os.path.join(StdPath,'data')+Test_info['SAMPLE_PATH'].iloc[i][1:], sr=16000)
  temp_feature=TimeAndFreq(data)
  Feature = pd.concat([Feature,temp_feature],axis=0) # 데이터 순서대로 쌓기  
Feature.columns=np.arange(Feature.shape[1])
Feature=Feature.reset_index(drop=True)
Test_Feature=Feature

Describe

|       |             0 |            1 |              2 |             3 |             4 |             5 |            6 |           7 |                8 |                9 |             10 |             11 |            12 |            13 |            14 |            15 |            16 |             17 |            18 |            19 |           20 |          21 |          22 |          23 |           24 |             25 |            26 |            27 |            28 |            29 |           30 |           31 |             32 |            33 |            34 |           35 |            36 |          37 |           38 |          39 |             40 |            41 |            42 |            43 |            44 |           45 |           46 |             47 |            48 |            49 |            50 |           51 |          52 |          53 |          54 |             55 |           56 |            57 |            58 |            59 |           60 |           61 |             62 |           63 |           64 |           65 |           66 |         67 |           68 |              69 |             70 |           71 |            72 |            73 |            74 |           75 |           76 |             77 |            78 |            79 |           80 |           81 |          82 |           83 |                84 |             85 |           86 |            87 |            88 |            89 |           90 |           91 |             92 |            93 |            94 |            95 |            96 |          97 |          98 |          99 |            100 |          101 |           102 |           103 |           104 |           105 |          106 |            107 |           108 |           109 |           110 |          111 |         112 |          113 |              114 |            115 |            116 |           117 |           118 |           119 |           120 |           121 |            122 |           123 |           124 |           125 |          126 |         127 |          128 |             129 |            130 |           131 |            132 |            133 |           134 |           135 |           136 |            137 |            138 |            139 |           140 |          141 |         142 |              143 |              144 |            145 |            146 |            147 |            148 |           149 |           150 |           151 |            152 |            153 |            154 |           155 |          156 |         157 |               158 |               159 |            160 |            161 |            162 |            163 |            164 |
|:------|--------------:|-------------:|---------------:|--------------:|--------------:|--------------:|-------------:|------------:|-----------------:|-----------------:|---------------:|---------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|--------------:|--------------:|-------------:|------------:|------------:|------------:|-------------:|---------------:|--------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|--------------:|--------------:|-------------:|--------------:|------------:|-------------:|------------:|---------------:|--------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|--------------:|--------------:|--------------:|-------------:|------------:|------------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|-------------:|-------------:|-------------:|-------------:|-----------:|-------------:|----------------:|---------------:|-------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|--------------:|--------------:|-------------:|-------------:|------------:|-------------:|------------------:|---------------:|-------------:|--------------:|--------------:|--------------:|-------------:|-------------:|---------------:|--------------:|--------------:|--------------:|--------------:|------------:|------------:|------------:|---------------:|-------------:|--------------:|--------------:|--------------:|--------------:|-------------:|---------------:|--------------:|--------------:|--------------:|-------------:|------------:|-------------:|-----------------:|---------------:|---------------:|--------------:|--------------:|--------------:|--------------:|--------------:|---------------:|--------------:|--------------:|--------------:|-------------:|------------:|-------------:|----------------:|---------------:|--------------:|---------------:|---------------:|--------------:|--------------:|--------------:|---------------:|---------------:|---------------:|--------------:|-------------:|------------:|-----------------:|-----------------:|---------------:|---------------:|---------------:|---------------:|--------------:|--------------:|--------------:|---------------:|---------------:|---------------:|--------------:|-------------:|------------:|------------------:|------------------:|---------------:|---------------:|---------------:|---------------:|---------------:|
| count | 1514          | 1514         | 1514           | 1514          | 1514          | 1514          | 1514         | 1514        |   1514           |   1514           | 1514           | 1514           | 1514          | 1514          | 1514          | 1514          | 1514          | 1514           | 1514          | 1514          | 1514         | 1514        | 1514        |   1514      |    1514      | 1514           | 1514          | 1514          | 1514          | 1514          | 1514         | 1514         | 1514           | 1514          | 1514          | 1514         | 1514          | 1514        |   1514       |   1514      | 1514           | 1514          | 1514          | 1514          | 1514          | 1514         | 1514         | 1514           | 1514          | 1514          | 1514          | 1514         | 1514        |   1514      |    1514     | 1514           | 1514         | 1514          | 1514          | 1514          | 1514         | 1514         | 1514           | 1514         | 1514         | 1514         | 1514         | 1514       |    1514      |  1514           | 1514           | 1514         | 1514          | 1514          | 1514          | 1514         | 1514         | 1514           | 1514          | 1514          | 1514         | 1514         | 1514        |    1514      |    1514           | 1514           | 1514         | 1514          | 1514          | 1514          | 1514         | 1514         | 1514           | 1514          | 1514          | 1514          | 1514          | 1514        |   1514      |    1514     | 1514           | 1514         | 1514          | 1514          | 1514          | 1514          | 1514         | 1514           | 1514          | 1514          | 1514          | 1514         | 1514        |    1514      |   1514           | 1514           | 1514           | 1514          | 1514          | 1514          | 1514          | 1514          | 1514           | 1514          | 1514          | 1514          | 1514         | 1514        |    1514      |  1514           | 1514           | 1514          | 1514           | 1514           | 1514          | 1514          | 1514          | 1514           | 1514           | 1514           | 1514          | 1514         | 1514        |   1514           |   1514           | 1514           | 1514           | 1514           | 1514           | 1514          | 1514          | 1514          | 1514           | 1514           | 1514           | 1514          | 1514         | 1514        |    1514           |    1514           | 1514           | 1514           | 1514           | 1514           | 1514           |
| mean  |    0.0207855  |   -0.0224302 |   -8.42186e-08 |    0.00522529 |    0.00522529 |   -0.0423806  |   -0.0893371 |    4.07732  | -10242.2         | -47399.8         |    9.9665e-05  |   -0.000584906 |   -0.003645   |    0.00366062 |    0.00730562 |    0.0246802  |   -0.0242949  |    4.8602e-06  |    0.010097   |    0.0100822  |    0.0261115 |   -0.343477 |    2.50307  |    -50.5454 |    -125.144  |   -3.08906e-05 |   -0.0242931  |   -0.00732109 |    0.00729141 |    0.0146125  |    0.0386755 |   -0.0384493 |   -8.81226e-06 |    0.0161117  |    0.0161021  |    0.0167368 |   -0.476331   |    2.57176  |     10.8972  |     30.7932 |   -0.000160745 |   -0.0384132  |   -0.0128288  |    0.0128468  |    0.0256757  |    0.0737304 |   -0.0738017 |    1.85547e-05 |    0.0276747  |    0.027659   |    0.00427087 |   -0.281917  |    2.88134  |    -70.1841 |    -218.522 |   -2.23846e-05 |   -0.0734659 |   -0.0209831  |    0.0210546  |    0.0420378  |    0.0680054 |   -0.0650737 |    2.8043e-06  |    0.0233502 |    0.0233477 |    0.0564655 |   -0.270786  |    3.08468 |    -424.919  | -1048.24        |   -0.001004    |   -0.0643954 |   -0.0175272  |    0.0174562  |    0.0349834  |    0.0539938 |   -0.0517975 |    4.63415e-06 |    0.0158795  |    0.0158784  |    0.0590117 |   -0.0938483 |    3.46416  |     389.887  |    1297.81        |   -0.000407669 |   -0.0491429 |   -0.0111248  |    0.0110297  |    0.0221546  |    0.045462  |   -0.0458708 |   -3.76293e-06 |    0.0119992  |    0.0119986  |   -0.00201764 |    0.0871786  |    3.82118  |     58.528  |     114.848 |   -1.34732e-05 |   -0.0370514 |   -0.00806117 |    0.00807053 |    0.0161317  |    0.0318917  |   -0.0326521 |    1.43269e-06 |    0.00785812 |    0.00785794 |   -0.0156125  |    0.170689  |    4.11925  |     -68.9345 |   -438.267       |    2.61324e-05 |   -0.0131894   |   -0.0052208  |    0.00525286 |    0.0104737  |    0.0187089  |   -0.0186926  |    9.05805e-07 |    0.00428807 |    0.00428803 |   -0.00318303 |    0.17328   |    4.45962  |    -863.095  | -3343.74        |    6.11029e-06 |   -0.00475866 |   -0.00285494  |    0.00286094  |    0.00571588 |    0.00978269 |   -0.00986253 |    1.28708e-07 |    0.00216551  |    0.0021655   |   -0.00504045 |    0.171764  |    4.63042  |  -3020.24        | -12773.4         |    3.69447e-06 |   -0.00192707  |   -0.00144126  |    0.001446    |    0.00288727 |    0.00468333 |   -0.00475368 |    5.34409e-11 |    0.000987312 |    0.000987312 |   -0.00575001 |    0.193919  |    4.91994  |  559004           |       2.4175e+06  |    1.85178e-06 |   -0.000605159 |   -0.00065669  |    0.000658839 |    0.00131553  |
| std   |    0.00325322 |    0.0034179 |    1.61238e-06 |    0.0013714  |    0.0013714  |    0.0682183  |    0.271573  |    0.588881 | 417654           |      1.87643e+06 |    0.000294951 |    0.0019898   |    0.00128541 |    0.00127416 |    0.00255932 |    0.0094311  |    0.00952146 |    0.00052673  |    0.00415295 |    0.00415544 |    0.192107  |    0.438112 |    0.371438 |   1903.88   |    4569.68   |    0.000909157 |    0.00952203 |    0.00343891 |    0.0035191  |    0.00682027 |    0.0159472 |    0.016188  |    0.000517738 |    0.00836497 |    0.00836748 |    0.126013  |    0.619662   |    0.494585 |   1027.07    |   2375.95   |    0.00107255  |    0.0162438  |    0.00801629 |    0.00808504 |    0.0160441  |    0.0413717 |    0.0417389 |    0.0010006   |    0.0202641  |    0.0202609  |    0.0980183  |    0.426282  |    0.452969 |   1720.37   |    5575.72  |    0.00154959  |    0.042031  |    0.0188255  |    0.0189525  |    0.0377061  |    0.0341004 |    0.0299834 |    0.000320293 |    0.01512   |    0.0151206 |    0.106645  |    0.368854  |    0.41343 |   14293.4    | 32689.2         |    0.0022066   |    0.0305424 |    0.0136696  |    0.0139538  |    0.0276036  |    0.0137826 |    0.0118943 |    0.000178356 |    0.00518272 |    0.00518295 |    0.0993292 |    0.270193  |    0.430154 |   19270.7    |   70497.5         |    0.000983129 |    0.0168024 |    0.00439777 |    0.0044715  |    0.00885257 |    0.0078295 |    0.0082465 |    0.000122231 |    0.00206053 |    0.00206045 |    0.0597206  |    0.211859   |    0.505353 |   4485.6    |   15660.5   |    0.00022233  |    0.0200772 |    0.00148242 |    0.00151475 |    0.00298171 |    0.00787814 |    0.0084756 |    5.67495e-05 |    0.00209771 |    0.00209762 |    0.0412279  |    0.250014  |    0.601223 |   11679.1    |  53407.9         |    8.83041e-05 |    0.016938    |    0.00142134 |    0.0014519  |    0.00286982 |    0.00581424 |    0.00610004 |    1.96993e-05 |    0.00146463 |    0.0014646  |    0.0312617  |    0.271585  |    0.738781 |   22168.1    | 92167.4         |    3.02478e-05 |    0.00269769 |    0.000987864 |    0.000995003 |    0.00198219 |    0.00396569 |    0.00430616 |    5.39932e-06 |    0.000952061 |    0.000952058 |    0.0263769  |    0.358392  |    0.738475 | 121921           | 526009           |    1.05939e-05 |    0.00260997  |    0.000638025 |    0.000644094 |    0.00128201 |    0.00211965 |    0.00230184 |    1.16188e-08 |    0.000490917 |    0.000490917 |    0.0217107  |    0.528389  |    0.982065 |       2.17316e+07 |       9.17292e+07 |    3.68626e-06 |    0.00083679  |    0.000328777 |    0.000331638 |    0.000660401 |
| min   |    0.0141005  |   -0.0345186 |   -7.59789e-06 |    0.00369327 |    0.00369327 |   -0.3216     |   -1.20251   |    2.38223  |     -1.56227e+07 |     -7.04683e+07 |   -0.000365667 |   -0.0182586   |   -0.0109531  |    0.00253807 |    0.00505459 |    0.00776321 |   -0.0592175  |   -0.00190608  |    0.0032274  |    0.00321608 |   -0.612122  |   -1.39735  |    1.55681  | -69993.8    | -164243      |   -0.00330653  |   -0.0592175  |   -0.0175536  |    0.00159099 |    0.00422375 |    0.0105085 |   -0.107213  |   -0.00221422  |    0.00428297 |    0.0042817  |   -0.992855  |   -1.49036    |    1.55622  | -21853.3     | -47048.9    |   -0.00425079  |   -0.107213   |   -0.0375287  |    0.00242942 |    0.00575283 |    0.0179382 |   -0.217958  |   -0.00470823  |    0.00658033 |    0.00658031 |   -0.448091   |   -1.36931   |    1.75901  | -58226.1    | -196342     |   -0.0106906   |   -0.217958  |   -0.109209   |    0.00433987 |    0.00896447 |    0.022308  |   -0.165023  |   -0.00116297  |    0.006991  |    0.0069881 |   -0.278577  |   -1.28366   |    2.0392  | -541692      |    -1.20064e+06 |   -0.0188057   |   -0.165023  |   -0.0770888  |    0.00446108 |    0.00939059 |    0.025696  |   -0.104892  |   -0.000693278 |    0.00775412 |    0.00775399 |   -0.154297  |   -0.994391  |    2.38117  | -220543      | -914424           |   -0.00855454  |   -0.104892  |   -0.031403   |    0.00501141 |    0.0101462  |    0.023021  |   -0.0851916 |   -0.000494742 |    0.0072014  |    0.00720138 |   -0.143602   |   -0.558447   |    2.74784  | -72756.9    | -304914     |   -0.00146198  |   -0.0851916 |   -0.0129107  |    0.00477301 |    0.00954269 |    0.0165853  |   -0.0920765 |   -0.000225216 |    0.00400264 |    0.00400261 |   -0.12966    |   -0.417174  |    3.14781  | -401348      |     -1.88778e+06 |   -0.000285096 |   -0.053677    |   -0.00783576 |    0.0026597  |    0.00526828 |    0.00817932 |   -0.0449035  |   -7.89117e-05 |    0.00191642 |    0.0019164  |   -0.118583   |   -0.24435   |    3.40499  | -417923      |    -1.57075e+06 |   -0.000111948 |   -0.0146828  |   -0.00459081  |    0.00125595  |    0.00250237 |    0.00383071 |   -0.0211643  |   -3.42739e-05 |    0.000874905 |    0.000874899 |   -0.095332   |   -0.15216   |    3.58394  |     -4.57274e+06 |     -1.95047e+07 |   -4.01831e-05 |   -0.0106038   |   -0.00256845  |    0.000576008 |    0.00115645 |    0.00152503 |   -0.010629   |   -4.42983e-08 |    0.000373191 |    0.000373191 |   -0.0676855  |   -0.104168  |    3.77621  |      -4.92259e+07 |      -2.20982e+08 |   -7.94474e-06 |   -0.00383944  |   -0.00124407  |    0.000249384 |    0.000498643 |
| 25%   |    0.0181705  |   -0.0245927 |   -8.15745e-07 |    0.00454408 |    0.00454408 |   -0.0446518  |   -0.142356  |    3.8097   |  -6999.01        | -28344           |   -4.81866e-06 |   -1.49012e-08 |   -0.00345419 |    0.0030997  |    0.00616833 |    0.0169518  |   -0.0312159  |   -0.000309694 |    0.00657028 |    0.00654088 |   -0.10883   |   -0.614967 |    2.26691  |    -28.4093 |     -69.0108 |   -0.000564873 |   -0.0312159  |   -0.00938444 |    0.00432377 |    0.00888392 |    0.0257357 |   -0.0493747 |   -0.000325231 |    0.0091079  |    0.00910308 |   -0.061695  |   -1.00761    |    2.20923  |    -45.9308  |   -111.339  |   -0.000752736 |   -0.0493747  |   -0.016994   |    0.0060356  |    0.0120774  |    0.0402008 |   -0.093845  |   -0.000464219 |    0.0136186  |    0.0136124  |   -0.0580272  |   -0.425521  |    2.63221  |    -42.0375 |    -116.838 |   -0.000703508 |   -0.0937593 |   -0.0240109  |    0.00907339 |    0.0182789  |    0.0408038 |   -0.0783772 |   -0.000191865 |    0.0129806 |    0.0129795 |   -0.018788  |   -0.422039  |    2.85839 |    -101.236  |  -318.578       |   -0.0013348   |   -0.0782222 |   -0.0199051  |    0.00872429 |    0.01743    |    0.0444904 |   -0.055695  |   -0.000107584 |    0.012951   |    0.012947   |   -0.0137892 |   -0.165452  |    3.20281  |    -135.415  |    -459.202       |   -0.000588446 |   -0.0551686 |   -0.0114524  |    0.00873915 |    0.0174717  |    0.039565  |   -0.0513711 |   -7.25242e-05 |    0.0101198  |    0.0101195  |   -0.0421098  |   -0.00590588 |    3.47478  |   -162.806  |    -632.908 |   -0.000124176 |   -0.0502928 |   -0.00929458 |    0.00670547 |    0.0134217  |    0.0246199  |   -0.0400575 |   -2.64997e-05 |    0.00601806 |    0.00601793 |   -0.0416316  |    0.0210514 |    3.70085  |    -276.018  |  -1115.48        |   -2.55688e-05 |   -0.0281685   |   -0.00680697 |    0.00392305 |    0.00785121 |    0.0135378  |   -0.024512   |   -7.21835e-06 |    0.00293789 |    0.00293788 |   -0.0215494  |    0.0344001 |    3.95895  |    -493.445  | -2113.42        |   -9.60551e-06 |   -0.00624958 |   -0.00391617  |    0.00195187  |    0.00389965 |    0.00628636 |   -0.0141842  |   -2.20754e-06 |    0.00132608  |    0.00132608  |   -0.0224363  |    0.0400581 |    4.14642  |   -792.032       |  -3620.56        |   -2.53518e-06 |   -0.00424732  |   -0.00214265  |    0.000872803 |    0.00174315 |    0.00280777 |   -0.00703045 |   -7.10163e-09 |    0.000552442 |    0.000552442 |   -0.0217371  |    0.0517755 |    4.34321  | -122243           | -590419           |   -5.5979e-07  |   -0.00118434  |   -0.0010045   |    0.000365121 |    0.000731623 |
| 50%   |    0.0206575  |   -0.022042  |   -7.30086e-08 |    0.00489668 |    0.00489668 |   -0.0264365  |    0.0139937 |    4.13883  |  -1577.55        |  -5470.07        |    1.15875e-05 |    0           |   -0.00331437 |    0.00333158 |    0.00664512 |    0.024234   |   -0.022956   |   -2.73162e-07 |    0.0101048  |    0.0100948  |    0.031415  |   -0.375551 |    2.46872  |     -4.4236 |     -10.079  |   -6.60191e-07 |   -0.022956   |   -0.007381   |    0.00710499 |    0.014723   |    0.0372091 |   -0.0360979 |   -1.60074e-05 |    0.0172806  |    0.0172803  |    0.022744  |   -0.504887   |    2.53132  |     -8.58659 |    -22.2824 |   -8.91822e-05 |   -0.0360979  |   -0.0138831  |    0.0137884  |    0.0288635  |    0.0718049 |   -0.0714201 |    3.43118e-05 |    0.0263496  |    0.0263318  |    0.00453497 |   -0.237958  |    2.87498  |     12.9224 |      38.758 |    1.90809e-05 |   -0.0710214 |   -0.0178907  |    0.0178913  |    0.0367608  |    0.0636776 |   -0.0623312 |   -1.91969e-07 |    0.0217638 |    0.0217623 |    0.0571131 |   -0.217341  |    3.07369 |     -12.2112 |   -38.7017      |   -0.000334312 |   -0.0616228 |   -0.0156276  |    0.0154866  |    0.0314595  |    0.0512778 |   -0.0494071 |    3.17165e-06 |    0.0148406  |    0.0148391  |    0.0418749 |   -0.0533258 |    3.41875  |      35.0585 |     120.346       |   -0.000193853 |   -0.0488191 |   -0.0102005  |    0.00998791 |    0.0201561  |    0.0460355 |   -0.0459734 |   -1.09776e-06 |    0.0127413  |    0.0127406  |   -0.00930847 |    0.0730504  |    3.73217  |    -30.7393 |    -119.262 |   -6.52288e-07 |   -0.0430574 |   -0.00849792 |    0.00850129 |    0.0171379  |    0.0314029  |   -0.0307283 |   -2.52915e-07 |    0.00750998 |    0.00750995 |   -0.0207558  |    0.106791  |    3.98668  |     -46.5105 |   -183.025       |    1.21866e-05 |   -0.00659032  |   -0.00506782 |    0.00507567 |    0.0101929  |    0.0172816  |   -0.0166446  |    2.769e-07   |    0.00371094 |    0.00371094 |   -0.00567412 |    0.0950446 |    4.26443  |      98.1917 |   411.426       |    2.16196e-06 |   -0.00433455 |   -0.00248067  |    0.0024691   |    0.00495034 |    0.00806184 |   -0.0077084  |    8.98385e-08 |    0.00166564  |    0.00166564  |   -0.00773726 |    0.0877303 |    4.42169  |    182.74        |    772.258       |    1.2845e-06  |   -0.000432942 |   -0.00111078  |    0.00111538  |    0.00222574 |    0.00357798 |   -0.00366138 |    2.72763e-10 |    0.000701369 |    0.000701369 |   -0.00834168 |    0.0854948 |    4.63517  |   22326.5         |  116882           |    8.5881e-07  |   -0.000251143 |   -0.000468069 |    0.000468497 |    0.000935353 |
| 75%   |    0.0229177  |   -0.0197564 |    6.49458e-07 |    0.00515911 |    0.00515911 |   -0.00938888 |    0.061821  |    4.43991  |   6657.46        |  26314.5         |    3.04766e-05 |    0           |   -0.00307043 |    0.00347521 |    0.00693511 |    0.0314644  |   -0.0168948  |    0.000321594 |    0.0127923  |    0.012776   |    0.15573   |   -0.117018 |    2.71078  |     27.4674 |      69.3041 |    0.000393375 |   -0.0168948  |   -0.00428427 |    0.00920177 |    0.0184034  |    0.050547  |   -0.0256553 |    0.000323037 |    0.0206582  |    0.0206535  |    0.104494  |   -0.00566002 |    2.88515  |     46.6212  |    113.625  |    0.000340219 |   -0.0256248  |   -0.00589094 |    0.0170483  |    0.0335573  |    0.0941359 |   -0.0403249 |    0.000513886 |    0.0333839  |    0.0333296  |    0.0623189  |   -0.0543201 |    3.11533  |     45.5791 |     127.656 |    0.000697941 |   -0.0402075 |   -0.00888489 |    0.0237072  |    0.0474088  |    0.0833988 |   -0.0411652 |    0.000195851 |    0.0274252 |    0.0274243 |    0.13585   |   -0.0353694 |    3.33196 |     101.794  |   312.93        |    0.000144454 |   -0.0408968 |   -0.00868865 |    0.019264   |    0.0392779  |    0.0594483 |   -0.0442171 |    0.000119298 |    0.0165045  |    0.0165043  |    0.119323  |    0.0520162 |    3.69822  |     134.896  |     464.504       |    7.40142e-05 |   -0.0430986 |   -0.00865132 |    0.0111402  |    0.0224914  |    0.0506184 |   -0.0396552 |    6.55239e-05 |    0.0138204  |    0.0138198  |    0.0263972  |    0.172541   |    4.07046  |    174.2    |     666.419 |    0.000117291 |   -0.0337883 |   -0.00672912 |    0.00935809 |    0.0186773  |    0.0384193  |   -0.0254152 |    2.70309e-05 |    0.0102617  |    0.0102616  |    0.00413211 |    0.244774  |    4.37442  |     275.688  |   1115.4         |    6.82174e-05 |   -0.000375809 |   -0.00393937 |    0.00688264 |    0.0137125  |    0.0238233  |   -0.013399   |    7.52562e-06 |    0.00585976 |    0.00585959 |    0.0126811  |    0.220102  |    4.72119  |     509.083  |  2209.71        |    2.05927e-05 |   -0.00300776 |   -0.00194971  |    0.00393536  |    0.00785116 |    0.0136402  |   -0.00603839 |    2.27738e-06 |    0.00321142  |    0.0032114   |    0.00999659 |    0.18219   |    4.91681  |    792.071       |   3649.28        |    8.7535e-06  |    0           |   -0.000873252 |    0.00216024  |    0.00430482 |    0.00676869 |   -0.00266656 |    7.43766e-09 |    0.00150502  |    0.00150502  |    0.00790537 |    0.152369  |    5.16523  |  139507           |  664401           |    3.61606e-06 |    0           |   -0.000365815 |    0.00101102  |    0.00201605  |
| max   |    0.0476336  |   -0.015056  |    8.24288e-06 |    0.0122529  |    0.0122529  |    0.073119   |    0.76963   |    8.64122  |      2.64169e+06 |      1.05105e+07 |    0.00127825  |    0.00813479  |   -0.00251652 |    0.0109094  |    0.0218625  |    0.060677   |   -0.00753556 |    0.0026465   |    0.0200885  |    0.0200876  |    0.905096  |    2.33918  |    4.29541  |   9142.64   |   22536.8    |    0.00402558  |   -0.00753556 |   -0.00158631 |    0.0186305  |    0.0355692  |    0.0876189 |   -0.0110915 |    0.00212341  |    0.0395502  |    0.0395502  |    0.663292  |    4.63616    |    5.53594  |  22429       |  45914.7    |    0.00639954  |    0.00768277 |   -0.0025103  |    0.0385568  |    0.0760855  |    0.230295  |   -0.0164655 |    0.00408893  |    0.113305   |    0.113303   |    0.405532   |    3.74447   |    6.32319  |   9497.3    |   31142.5   |    0.0071037   |    0.101691  |   -0.00442954 |    0.109383   |    0.216826   |    0.194254  |   -0.0216583 |    0.00220555  |    0.0860251 |    0.0860244 |    0.381377  |    1.45603   |    5.17218 |   20982.9    | 62560.6         |    0.0022874   |    0.0603731 |   -0.00453502 |    0.079457   |    0.156546   |    0.109381  |   -0.0251217 |    0.000593856 |    0.0385797  |    0.0385786  |    0.510597  |    3.69772   |    8.52313  |  712908      |       2.57419e+06 |    0.00106595  |    0.0488046 |   -0.00511853 |    0.0311249  |    0.0625002  |    0.0856422 |   -0.0258219 |    0.000455846 |    0.0175423  |    0.0175422  |    0.272609   |    1.03988    |    7.42124  | 138168      |  423098     |    0.000699696 |    0.0248121 |   -0.00476969 |    0.0126607  |    0.0252345  |    0.0704308  |   -0.0155707 |    0.000385666 |    0.0119809  |    0.01198    |    0.262301   |    1.92597   |   10.0124   |  118140      | 512688           |    0.000401704 |    0.0206085   |   -0.00260858 |    0.00811905 |    0.0158797  |    0.0432895  |   -0.00837255 |    0.00010831  |    0.00690444 |    0.0069044  |    0.14848    |    3.45945   |    9.75969  |  342326      |     1.6015e+06  |    0.000142351 |    0.00400104 |   -0.00124642  |    0.00463139  |    0.00920914 |    0.0215853  |   -0.00383718 |    2.76641e-05 |    0.00381472  |    0.00381472  |    0.100932   |    5.79572   |   10.2635   |      1.1012e+06  |      5.48139e+06 |    6.25771e-05 |    0.00475826  |   -0.000580438 |    0.0025774   |    0.005145   |    0.0103034  |   -0.00166746 |    8.98651e-08 |    0.0018605   |    0.0018605   |    0.0822072  |    8.22963   |   17.975    |       8.3531e+08  |       3.51498e+09 |    1.83339e-05 |    0.0022003   |   -0.000247759 |    0.00125766  |    0.00250173  |

In [None]:
Test_Feature.to_csv(os.path.join(StdPath,'Feature/test.csv'),sep=',',index=None,header=None)
Test_Feature.shape

(1514, 165)

### 2. 데이터 전처리

In [None]:
### Feature data 불러오기 - 용량 부족으로 인한 과정
Train_Feature = pd.read_csv(os.path.join(StdPath,'Feature/train.csv'),sep=',',header=None)
Test_Feature = pd.read_csv(os.path.join(StdPath,'Feature/test.csv'),sep=',',header=None)
print(Train_Feature.shape,Test_Feature.shape)

(1279, 165) (1514, 165)


2-1. 데이터를 FAN 모델 종류에 따라서 분류하기

In [None]:
#Train데이터
Train_Feature_0 = Train_Feature.iloc[Train_info[Train_info['FAN_TYPE']==0].index,:] 
Train_Feature_2 = Train_Feature.iloc[Train_info[Train_info['FAN_TYPE']==2].index,:]
#test데이터
Test_Feature_0 = Test_Feature.iloc[Test_info[Test_info['FAN_TYPE']==0].index,:] 
Test_Feature_2 = Test_Feature.iloc[Test_info[Test_info['FAN_TYPE']==2].index,:]

2-2. Train 데이터 scaling

In [None]:
scaler_0 = MinMaxScaler()
scaler_2 = MinMaxScaler()
Train_Feature_0 = pd.DataFrame(scaler_0.fit_transform(Train_Feature_0)) 
Train_Feature_2 = pd.DataFrame(scaler_2.fit_transform(Train_Feature_2)) 

2-3.Test 데이터 scaling

In [None]:
Test_Feature_0 = pd.DataFrame(scaler_0.transform(Test_Feature_0)) 
Test_Feature_2 = pd.DataFrame(scaler_2.transform(Test_Feature_2)) 

### 3. 학습 진행

In [None]:
print(Train_Feature_0.shape,Train_Feature_2.shape)
print(Test_Feature_0.shape,Test_Feature_2.shape)

(639, 165) (640, 165)
(779, 165) (735, 165)


3-1. IsolationForest

In [None]:
N = 260
Max = 'auto'
Con = 0.11
Feature  = 1.0
model_0_IF = IsolationForest(n_estimators=N, max_samples=Max, contamination=Con, random_state=777, verbose=0,max_features=Feature)
model_0_IF.fit(Train_Feature_0)
model_2_IF = IsolationForest(n_estimators=N,max_samples=Max, contamination=Con, random_state=777, verbose=0,max_features=Feature)
model_2_IF.fit(Train_Feature_2)

IsolationForest(contamination=0.11, n_estimators=260, random_state=777)

3-2. OneClassSVM

In [None]:
Nu =0.01
Gam = 0.202
T=0.001
model_0_SVM = OneClassSVM(nu=Nu, kernel="rbf", gamma=Gam,tol=T)
model_0_SVM.fit(Train_Feature_0)
model_2_SVM = OneClassSVM(nu=Nu, kernel="rbf", gamma=Gam,tol=T)
model_2_SVM.fit(Train_Feature_2)

OneClassSVM(gamma=0.202, nu=0.01)

3-3. AutoEncoder

3-4-0. 파라미터 설정

In [None]:
Size=64
iteration=30 # 원래 최고는 1000일때 임
learningRate=0.001
NU=1/len(Train_Feature_0)

3-4-1. 기본 모델 설정

In [None]:
def Autoencoder(Input_shape,rate):
  tf.random.set_seed(777)
  model=keras.models.Sequential()
  model.add(keras.layers.Dense(Input_shape, activation='relu', input_shape=(Input_shape, )))
  model.add(keras.layers.Dense(16, activation='relu'))
  model.add(keras.layers.Dense(2, activation='relu'))
  model.add(keras.layers.Dense(16, activation='relu'))
  model.add(keras.layers.Dense(Input_shape, activation='relu'))
  model.compile(optimizer='adam',loss="mse",metrics=["acc"])
  return model

3-4-2. K-Fold 5개 모델 설정

In [None]:
def Make_Models(fold,data,size,Iteration,inital):
  kfold = KFold(n_splits = fold, shuffle = True, random_state = 777)
  Data = data.to_numpy()
  num=1
  model_list = []
  for Train , Valid in kfold.split(Data):
    X_train, X_valid = Data[Train], Data[Valid]
    y_train = np.zeros((len(Train),1))
    y_valid = np.zeros((len(Valid),1))
    model =  Autoencoder(X_train.shape[1],inital)
    hist = model.fit(X_train, X_train, epochs=Iteration, batch_size=size,validation_data=(X_valid,X_valid),verbose =0)
    model_list.append(model)
    Loss, Accuracy = model.evaluate(X_valid,  y_valid)
    print("Loss: ",Loss)
  return model_list

3-4-3. 모델 학습

In [None]:
Fold = 5
List_0 = Make_Models(Fold,Train_Feature_0,Size,iteration,learningRate)
List_2 = Make_Models(Fold,Train_Feature_2,Size,iteration,learningRate)

Loss:  0.14516539871692657
Loss:  0.14291268587112427
Loss:  0.14519497752189636
Loss:  0.14197979867458344
Loss:  0.14412857592105865
Loss:  0.1400827318429947
Loss:  0.13889697194099426
Loss:  0.13808466494083405
Loss:  0.13819913566112518
Loss:  0.13910473883152008


3-4-4. 임계치 찾기

In [None]:
def Difference(Model,data):
  Loss = (data-Model.predict(data))**2
  Loss = np.mean((Loss),axis =1)
  return Loss

In [None]:
def find_thresholds(Model_list,feature_data,the):
  Finds = int(the*len(feature_data))
  Thes=[]
  for i in range(len(Model_list)):
    Diff = Difference(Model_list[i],feature_data)
    Diff  = Diff.sort_values(ascending = False)
    the = Diff.iloc[Finds] 
    Thes.append(the)
  return Thes

In [None]:
The_0 = find_thresholds(List_0,Train_Feature_0,NU)
The_2 = find_thresholds(List_2,Train_Feature_2,NU)
Threshold = np.zeros((Fold,2))
Threshold[:,0] = The_0
Threshold[:,1] = The_2
Threshold= pd.DataFrame(Threshold,columns=['Feature_0','Feature_2'])



### 4. Predict

4-1. Labeling 함수 

In [None]:
# IsolationForest 모델 출력 (1:정상, -1:불량) 이므로 (0:정상, 1:불량)로 Label 변환
def Labeling(data):
  Pred = pd.DataFrame(data)
  Pred = Pred.replace(1,0) # 정상 레이블
  Pred = Pred.replace(-1,1) # 불량 레이블
  Pred.columns = ['score']
  return Pred

4-2. 오토 인코더 모델 해석 함수

In [None]:
#오토 인코더 모델 예측 
def Predict(Model_list,data,threshold_data):
  for i in range(len(Model_list)):
    Loss = (data-Model_list[i].predict(data))**2
    Score = pd.DataFrame(np.mean(Loss,axis =1),columns = ['score'])
    Ones = np.where(Score['score']>=threshold_data.iloc[i])[0].tolist()
    Results = pd.DataFrame(np.zeros((len(data),1)))
    Results.loc[Ones,0] = 1 
    Results = Results.astype('int')
    Results.columns = ['score']
    return Results

4-3. 앙상블 함수 

In [None]:
def Ensemble(pred_1,pred_2,pred_3):
  Pred_1 = pred_1.to_numpy()
  Pred_2 = pred_2.to_numpy()
  Pred_3 = pred_3.to_numpy()
  Pred = (Pred_1+Pred_2+Pred_3)/3
  Pred = pd.DataFrame(Pred,columns=['score'])
  Pred.loc[Pred['score']>0.5]=1
  Pred.loc[Pred['score']<0.5]=0
  return Pred

4-3. test 데이터 예측 진행

In [None]:
Pred_0_IF = Labeling(model_0_IF.predict(Test_Feature_0))
Pred_2_IF = Labeling(model_2_IF.predict(Test_Feature_2))
Pred_0_SVM = Labeling(model_0_SVM.predict(Test_Feature_0))
Pred_2_SVM = Labeling(model_2_SVM.predict(Test_Feature_2))
Pred_0_Auto  = Predict(List_0,Test_Feature_0,Threshold['Feature_0'])
Pred_2_Auto  = Predict(List_2,Test_Feature_2,Threshold['Feature_2'])
Pred_0 = Ensemble(Pred_0_IF,Pred_0_SVM,Pred_0_Auto)
Pred_2 = Ensemble(Pred_2_IF,Pred_2_SVM,Pred_2_Auto)



4-4. 제출 양식에 맞게 하기

In [None]:
def Make_submitFile(pred_0,pred_2):
  Final = pd.read_csv(os.path.join(StdPath,'data/sample_submission.csv'),sep=',')
  Label_0 = Test_info[Test_info.FAN_TYPE==0].index.tolist()
  Label_2 = Test_info[Test_info.FAN_TYPE==2].index.tolist()
  for i in range(len(Label_0)):
    Final.loc[[Label_0[i]],['LABEL']]=pred_0['score'].iloc[i]
  for i in range(len(Label_2)):
    Final.loc[[Label_2[i]],['LABEL']]=pred_2['score'].iloc[i]
  return Final
Final = Make_submitFile(Pred_0,Pred_2)
Final.to_csv(os.path.join(StdPath,'final.csv'),sep=',',index=False)