In [1]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pycaret

  Building wheel for plotly-resampler (pyproject.toml) ... [?25l[?25hdone
  Created wheel for plotly-resampler: filename=plotly_resampler-0.8.3.2-cp39-cp39-manylinux_2_31_x86_64.whl size=75054 sha256=0d0c8b3001217563ba29d6dc555c662d285f662edee27b26bfbf24d090a631fd
  Stored in directory: /root/.cache/pip/wheels/9e/36/06/4c11e300918011376af149098621ec7ebe06d8256566d43d51
  Building wheel for pyod (setup.py) ... [?25l[?25hdone
  Created wheel for pyod: filename=pyod-1.0.9-py3-none-any.whl size=184112 sha256=482d23a1b75269bbc509f23d8003f78a2d49f0376a08420d8d62e87ffffff017
  Stored in directory: /root/.cache/pip/wheels/1b/9c/b8/9759d7cc64a1e01bb9872ade80cb7db445ccf506e083325106
Successfully built plotly-resampler pyod
Installing collected packages: trace-updater, kaleido, dash-table, dash-html-components, dash-core-components, xxhash, wurlitzer, schemdraw, retrying, orjson, jedi, deprecation, deprecated, ansi2html, sktime, scikit-plot, pyod, dash, pmdarima, jupyter-dash, category-encode

In [51]:
import random
import pandas as pd
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from pycaret.anomaly import *

from pyod.models.abod import ABOD
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings(action='ignore')

In [52]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

In [53]:
data_path = '/content/drive/MyDrive/Colab Notebooks/Air_Pressure'
train = pd.read_csv(data_path+'/train_data.csv')
test = pd.read_csv(data_path+'/test_data.csv')

## Pre-Processing

In [54]:
train.loc[train['type'] == 0, 'HP'] = 30
train.loc[train['type'] == 1, 'HP'] = 20
train.loc[train['type'] == 2, 'HP'] = 10
train.loc[train['type'] == 3, 'HP'] = 50
train.loc[train['type'] == 4, 'HP'] = 30
train.loc[train['type'] == 5, 'HP'] = 30
train.loc[train['type'] == 6, 'HP'] = 30
train.loc[train['type'] == 7, 'HP'] = 30

test.loc[test['type'] == 0, 'HP'] = 30
test.loc[test['type'] == 1, 'HP'] = 20
test.loc[test['type'] == 2, 'HP'] = 10
test.loc[test['type'] == 3, 'HP'] = 50
test.loc[test['type'] == 4, 'HP'] = 30
test.loc[test['type'] == 5, 'HP'] = 30
test.loc[test['type'] == 6, 'HP'] = 30
test.loc[test['type'] == 7, 'HP'] = 30

train['volt'] = ((1 / 0.746) * train['HP']) / train['motor_current']
test['volt'] = ((1 / 0.746) * test['HP']) / test['motor_current']

train['torque'] = train['HP'] / train['motor_rpm']
test['torque'] = test['HP'] / test['motor_rpm']

train['각속도'] = ((1 / 0.746) * train['HP']) / train['torque']
test['각속도'] = ((1 / 0.746) * test['HP']) / test['torque']

train['회전수/진동'] = train['motor_rpm'] / train['motor_vibe']
test['회전수/진동'] = test['motor_rpm'] / test['motor_vibe']

train['회전수/전류'] = train['motor_rpm'] / train['motor_current']
test['회전수/전류'] = test['motor_rpm'] / test['motor_current']

train['temp'] = train['air_end_temp'] + train['motor_temp']
test['temp'] = test['air_end_temp'] + test['motor_temp']

train['회전수/유량'] = train['motor_rpm'] / train['air_inflow']
test['회전수/유량'] = test['motor_rpm'] / test['air_inflow']

train['진동수/유량'] = train['motor_vibe'] / train['air_inflow']
test['진동수/유량'] = test['motor_vibe'] / test['air_inflow']

train['전류/진동수'] = train['motor_current'] / train['motor_vibe']
test['전류/진동수'] = test['motor_current'] / test['motor_vibe']

train['주기'] = (1 / (60 * train['motor_rpm'])) * 100000
test['주기'] = (1 / (60 * test['motor_rpm'])) * 100000

## Type

In [55]:
# train
idx = 0
train_idx = []
for i in range(8):
    idx += len(pd.DataFrame(train.loc[train['type'] == i]))
    train_idx.append(idx)

print(train_idx)

[432, 801, 1167, 1473, 1779, 2028, 2277, 2463]


In [56]:
train_0 = train[0:train_idx[0]]
train_1 = train[train_idx[0]:train_idx[1]]
train_2 = train[train_idx[1]:train_idx[2]]
train_3 = train[train_idx[2]:train_idx[3]]
train_4 = train[train_idx[3]:train_idx[4]]
train_5 = train[train_idx[4]:train_idx[5]]
train_6 = train[train_idx[5]:train_idx[6]]
train_7 = train[train_idx[6]:train_idx[7]]

In [57]:
train_list = [train_0, train_1, train_2, train_3, train_4, train_5, train_6, train_7]

In [58]:
# test
idx = 0
test_idx = []
for i in range(8):
    idx += len(pd.DataFrame(test.loc[test['type'] == i]))
    test_idx.append(idx)

print(test_idx)

[1296, 2403, 3501, 4419, 5337, 6084, 6831, 7389]


In [59]:
test_0 = test[0:test_idx[0]]
test_1 = test[test_idx[0]:test_idx[1]]
test_2 = test[test_idx[1]:test_idx[2]]
test_3 = test[test_idx[2]:test_idx[3]]
test_4 = test[test_idx[3]:test_idx[4]]
test_5 = test[test_idx[4]:test_idx[5]]
test_6 = test[test_idx[5]:test_idx[6]]
test_7 = test[test_idx[6]:test_idx[7]]

In [60]:
test_list = [test_0, test_1, test_2, test_3, test_4, test_5, test_6, test_7]

## Train

In [61]:
# KFold
kf = KFold(n_splits=10)
model_list = []

for train_type in train_list:

  for train_index, test_index in kf.split(train_type):
      X_train, X_test = train.loc[train_index], train.loc[test_index]

      anom = setup(data = X_train, verbose = 0, session_id = 69, normalize = False)
      model = create_model('abod', fraction=0.01)
      model_list.append(model)

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

In [80]:
model_list_0 = model_list[0:10]
model_list_1 = model_list[10:20]
model_list_2 = model_list[20:30]
model_list_3 = model_list[30:40]
model_list_4 = model_list[40:50]
model_list_5 = model_list[50:60]
model_list_6 = model_list[60:70]
model_list_7 = model_list[70:80]

#### hard voting

In [83]:
preds_0 = []
threshold = 0

for m in model_list_0:
  predictions = predict_model(m, test_0)
  preds_0.append(np.array(predictions['Anomaly']))

pred_0 = 1*(np.sum(preds_0,axis=0) >= len(preds_0))

print(list(pred_0).count(0), list(pred_0).count(1))

1153 143


In [84]:
preds_1 = []
threshold = 0

for m in model_list_1:
  predictions = predict_model(m, test_1)
  preds_1.append(np.array(predictions['Anomaly']))

pred_1 = 1*(np.sum(preds_1,axis=0) >= len(preds_1))

print(list(pred_1).count(0), list(pred_1).count(1))

0 1107


In [98]:
preds_2 = []
threshold = 0

for m in model_list_2:
  predictions = predict_model(m, test_2)
  preds_2.append(np.array(predictions['Anomaly']))

pred_2 = 1*(np.sum(preds_2,axis=0) >= len(preds_2))

print(list(pred_2).count(0), list(pred_2).count(1))

0 1098


In [86]:
preds_3 = []
threshold = 0

for m in model_list_3:
  predictions = predict_model(m, test_3)
  preds_3.append(np.array(predictions['Anomaly']))

pred_3 = 1*(np.sum(preds_3,axis=0) >= len(preds_3))

print(list(pred_3).count(0), list(pred_3).count(1))

0 918


In [87]:
preds_4 = []
threshold = 0

for m in model_list_4:
  predictions = predict_model(m, test_4)
  preds_4.append(np.array(predictions['Anomaly']))

pred_4 = 1*(np.sum(preds_4,axis=0) >= len(preds_4))

print(list(pred_4).count(0), list(pred_4).count(1))

909 9


In [89]:
preds_5 = []
threshold = 0

for m in model_list_5:
  predictions = predict_model(m, test_5)
  preds_5.append(np.array(predictions['Anomaly']))

pred_5 = 1*(np.sum(preds_5,axis=0) >= len(preds_5))

print(list(pred_5).count(0), list(pred_5).count(1))

716 31


In [90]:
preds_6 = []
threshold = 0

for m in model_list_6:
  predictions = predict_model(m, test_6)
  preds_6.append(np.array(predictions['Anomaly']))

pred_6 = 1*(np.sum(preds_6,axis=0) >= len(preds_6))

print(list(pred_6).count(0), list(pred_6).count(1))

645 102


In [91]:
preds_7 = []
threshold = 0

for m in model_list_7:
  predictions = predict_model(m, test_7)
  preds_7.append(np.array(predictions['Anomaly']))

pred_7 = 1*(np.sum(preds_7,axis=0) >= len(preds_7))

print(list(pred_7).count(0), list(pred_7).count(1))

553 5


In [92]:
list(pred_0).count(1) + list(pred_1).count(1) + list(pred_2).count(1) + list(pred_3).count(1) + list(pred_4).count(1) + list(pred_5).count(1) + list(pred_6).count(1) + list(pred_7).count(1)

3413

## Ensemble

## Submission

In [None]:
submit = pd.read_csv(data_path+'/answer_sample.csv')
submit['label'] = pred
submit.to_csv('/content/drive/MyDrive/Colab Notebooks/Air_Pressure/ABOD_threshold8_f0.01.csv', index=False)