In [48]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install pycaret

In [50]:
import random
import pandas as pd
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold, StratifiedKFold
from imblearn.over_sampling import SMOTE
import imblearn
from imblearn.under_sampling import RandomUnderSampler, TomekLinks, ClusterCentroids

from pycaret.anomaly import *

from pyod.models.abod import ABOD

import warnings
warnings.filterwarnings(action='ignore')

In [109]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

## Data Load

In [110]:
data_path = '/content/drive/MyDrive/AI_SPARK/dataset'
train = pd.read_csv(data_path+'/train_data.csv')
test = pd.read_csv(data_path+'/test_data.csv')

In [53]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0
...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7


## Pre-Processing

#### 1) 변수 생성

In [111]:
train.loc[train['type'] == 0, 'HP'] = 30
train.loc[train['type'] == 1, 'HP'] = 20
train.loc[train['type'] == 2, 'HP'] = 10
train.loc[train['type'] == 3, 'HP'] = 50
train.loc[train['type'] == 4, 'HP'] = 30
train.loc[train['type'] == 5, 'HP'] = 30
train.loc[train['type'] == 6, 'HP'] = 30
train.loc[train['type'] == 7, 'HP'] = 30

test.loc[test['type'] == 0, 'HP'] = 30
test.loc[test['type'] == 1, 'HP'] = 20
test.loc[test['type'] == 2, 'HP'] = 10
test.loc[test['type'] == 3, 'HP'] = 50
test.loc[test['type'] == 4, 'HP'] = 30
test.loc[test['type'] == 5, 'HP'] = 30
test.loc[test['type'] == 6, 'HP'] = 30
test.loc[test['type'] == 7, 'HP'] = 30

train['volt'] = ((1 / 0.746) * train['HP']) / train['motor_current']
test['volt'] = ((1 / 0.746) * test['HP']) / test['motor_current']

train['torque'] = train['HP'] / train['motor_rpm']
test['torque'] = test['HP'] / test['motor_rpm']

train['각속도'] = ((1 / 0.746) * train['HP']) / train['torque']
test['각속도'] = ((1 / 0.746) * test['HP']) / test['torque']

train['회전수/진동'] = train['motor_rpm'] / train['motor_vibe']
test['회전수/진동'] = test['motor_rpm'] / test['motor_vibe']

train['회전수/전류'] = train['motor_rpm'] / train['motor_current']
test['회전수/전류'] = test['motor_rpm'] / test['motor_current']

train['temp'] = train['air_end_temp'] + train['motor_temp']
test['temp'] = test['air_end_temp'] + test['motor_temp']

train['회전수/유량'] = train['motor_rpm'] / train['air_inflow']
test['회전수/유량'] = test['motor_rpm'] / test['air_inflow']

train['진동수/유량'] = train['motor_vibe'] / train['air_inflow']
test['진동수/유량'] = test['motor_vibe'] / test['air_inflow']

train['전류/진동수'] = train['motor_current'] / train['motor_vibe']
test['전류/진동수'] = test['motor_current'] / test['motor_vibe']

train['주기'] = (1 / (60 * train['motor_rpm'])) * 100000
test['주기'] = (1 / (60 * test['motor_rpm'])) * 100000

In [112]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type,HP,volt,torque,각속도,회전수/진동,회전수/전류,temp,회전수/유량,진동수/유량,전류/진동수,주기
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0,30.0,1.958815,0.017857,2252.010724,573.378840,81.831466,99.67,1056.603774,1.842767,7.006826,0.992063
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0,30.0,1.047252,0.009548,4211.796247,837.866667,81.822917,134.19,1057.912458,1.262626,10.240000,0.530448
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0,30.0,1.626141,0.014829,2711.796247,648.397436,81.803478,107.77,1059.162304,1.633508,7.926282,0.823859
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0,30.0,1.312911,0.011971,3359.249330,739.233038,81.815214,119.17,1057.383966,1.430380,9.035398,0.665070
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0,30.0,1.631419,0.014874,2703.753351,646.474359,81.825558,107.62,1061.578947,1.642105,7.900641,0.826310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7,30.0,1.361818,0.012417,3238.605898,723.353293,81.815103,117.04,1059.649123,1.464912,8.841317,0.689845
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7,30.0,1.526746,0.013921,2888.739946,673.437500,81.814730,110.88,1056.372549,1.568627,8.231250,0.773395
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7,30.0,2.613026,0.023828,1687.667560,466.296296,81.806368,89.73,1057.983193,2.268908,5.700000,1.323802
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7,30.0,2.571258,0.023438,1715.817694,472.324723,81.841432,90.22,1057.851240,2.239669,5.771218,1.302083


## SMOTE

In [113]:
train.shape

(2463, 19)

In [114]:
label = train['type']
sm = ClusterCentroids(sampling_strategy='auto', )
train, label = sm.fit_resample(train, label)

In [115]:
train.shape

(1488, 19)

## Train

In [116]:
anom = setup(data = train, verbose = 0, session_id = 69, normalize = False, )

In [117]:
models()

Unnamed: 0_level_0,Name,Reference
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
abod,Angle-base Outlier Detection,pyod.models.abod.ABOD
cluster,Clustering-Based Local Outlier,pyod.models.cblof.CBLOF
cof,Connectivity-Based Local Outlier,pyod.models.cof.COF
iforest,Isolation Forest,pyod.models.iforest.IForest
histogram,Histogram-based Outlier Detection,pyod.models.hbos.HBOS
knn,K-Nearest Neighbors Detector,pyod.models.knn.KNN
lof,Local Outlier Factor,pyod.models.lof.LOF
svm,One-class SVM detector,pyod.models.ocsvm.OCSVM
pca,Principal Component Analysis,pyod.models.pca.PCA
mcd,Minimum Covariance Determinant,pyod.models.mcd.MCD


In [118]:
label = train['type']
kf = KFold(n_splits=20)
model_list = []

for train_index, test_index in kf.split(train, label):
    X_train, X_test = train.loc[train_index], train.loc[test_index]

    anom = setup(data = X_train, verbose = 0, session_id = 69, normalize = False, )
    model = create_model('abod', fraction=0.02)
    model_list.append(model)

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

## Predict

In [119]:
preds = []
threshold = 0

for m in model_list:
  predictions = predict_model(m, test)
  preds.append(np.array(predictions['Anomaly']))

In [120]:
# pred = 1*(np.sum(preds,axis=0) >= len(model_list))
pred = 1*(np.sum(preds,axis=0) >= 9)

In [121]:
list(pred).count(0), list(pred).count(1)

(7053, 336)

## Submission

In [122]:
submit = pd.read_csv(data_path+'/answer_sample.csv')

In [123]:
submit['label'] = pred
submit.head()

Unnamed: 0,type,label
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [124]:
submit.to_csv('/content/drive/MyDrive/AI_SPARK/submit/57_002_332_k20_v9_underCC_auto_1488.csv', index=False)