In [96]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install pycaret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycaret
  Downloading pycaret-3.0.0-py3-none-any.whl (481 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m481.8/481.8 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
Collecting deprecation>=2.1.0
  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting xxhash
  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 kB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
Collecting kaleido>=0.2.1
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-plot>=0.3.7
  Downloading scikit_plot-0.3.7-py3-none-any.whl (33 kB)
Collecting sktime>=0.16.1
  Downloading sktime-0.17.1-py3-none-any.whl (16.1 

In [153]:
import random
import pandas as pd
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from pycaret.anomaly import *

from pyod.models.abod import ABOD

import warnings
warnings.filterwarnings(action='ignore')

In [154]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

## Data Load

In [155]:
data_path = '/content/drive/MyDrive/Colab Notebooks/AI_SPARK/Data'
train = pd.read_csv(data_path+'/train_data.csv')
test = pd.read_csv(data_path+'/test_data.csv')

In [156]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0
...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7


In [157]:
test

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
0,2.51,53.28,0.7,32.54,2662.0,69.58,3.48,0
1,2.66,55.24,0.7,34.45,2819.0,71.32,3.57,0
2,1.72,42.74,0.7,22.23,1819.0,60.21,3.01,0
3,2.20,49.15,0.7,28.50,2332.0,65.91,3.30,0
4,2.06,47.28,0.7,26.67,2182.0,64.24,3.21,0
...,...,...,...,...,...,...,...,...
7384,2.12,48.08,0.7,27.45,2246.0,64.96,3.25,7
7385,1.48,39.63,0.7,19.19,1570.0,57.44,2.87,7
7386,1.56,40.61,0.7,20.15,1649.0,58.32,2.92,7
7387,1.59,40.99,0.7,20.52,1679.0,58.66,2.93,7


## Pre-Processing

#### 1) '마력' 변수 생성

In [158]:
train.loc[train['type'] == 0, 'HP'] = 30
train.loc[train['type'] == 1, 'HP'] = 20
train.loc[train['type'] == 2, 'HP'] = 10
train.loc[train['type'] == 3, 'HP'] = 50
train.loc[train['type'] == 4, 'HP'] = 30
train.loc[train['type'] == 5, 'HP'] = 30
train.loc[train['type'] == 6, 'HP'] = 30
train.loc[train['type'] == 7, 'HP'] = 30

test.loc[test['type'] == 0, 'HP'] = 30
test.loc[test['type'] == 1, 'HP'] = 20
test.loc[test['type'] == 2, 'HP'] = 10
test.loc[test['type'] == 3, 'HP'] = 50
test.loc[test['type'] == 4, 'HP'] = 30
test.loc[test['type'] == 5, 'HP'] = 30
test.loc[test['type'] == 6, 'HP'] = 30
test.loc[test['type'] == 7, 'HP'] = 30

In [159]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type,HP
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0,30.0
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0,30.0
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0,30.0
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0,30.0
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0,30.0
...,...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7,30.0
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7,30.0
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7,30.0
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7,30.0


In [160]:
test

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type,HP
0,2.51,53.28,0.7,32.54,2662.0,69.58,3.48,0,30.0
1,2.66,55.24,0.7,34.45,2819.0,71.32,3.57,0,30.0
2,1.72,42.74,0.7,22.23,1819.0,60.21,3.01,0,30.0
3,2.20,49.15,0.7,28.50,2332.0,65.91,3.30,0,30.0
4,2.06,47.28,0.7,26.67,2182.0,64.24,3.21,0,30.0
...,...,...,...,...,...,...,...,...,...
7384,2.12,48.08,0.7,27.45,2246.0,64.96,3.25,7,30.0
7385,1.48,39.63,0.7,19.19,1570.0,57.44,2.87,7,30.0
7386,1.56,40.61,0.7,20.15,1649.0,58.32,2.92,7,30.0
7387,1.59,40.99,0.7,20.52,1679.0,58.66,2.93,7,30.0


#### 2) 변수 생성

In [161]:
train['volt'] = ((1 / 0.746) * train['HP']) / train['motor_current']
test['volt'] = ((1 / 0.746) * test['HP']) / test['motor_current']

In [162]:
train['torque'] = train['HP'] / train['motor_rpm']
test['torque'] = test['HP'] / test['motor_rpm']

In [163]:
train['각속도'] = ((1 / 0.746) * train['HP']) / train['torque']
test['각속도'] = ((1 / 0.746) * test['HP']) / test['torque']

In [164]:
train['회전수/진동'] = train['motor_rpm'] / train['motor_vibe']
test['회전수/진동'] = test['motor_rpm'] / test['motor_vibe']

In [165]:
train['회전수/전류'] = train['motor_rpm'] / train['motor_current']
test['회전수/전류'] = test['motor_rpm'] / test['motor_current']

In [166]:
train['temp'] = train['air_end_temp'] + train['motor_temp']
test['temp'] = test['air_end_temp'] + test['motor_temp']

In [167]:
train['회전수/유량'] = train['motor_rpm'] / train['air_inflow']
test['회전수/유량'] = test['motor_rpm'] / test['air_inflow']

In [168]:
train['진동수/유량'] = train['motor_vibe'] / train['air_inflow']
test['진동수/유량'] = test['motor_vibe'] / test['air_inflow']

In [169]:
train['전류/진동수'] = train['motor_current'] / train['motor_vibe']
test['전류/진동수'] = test['motor_current'] / test['motor_vibe']

In [170]:
train['주기'] = (1 / (60 * train['motor_rpm'])) * 100000
test['주기'] = (1 / (60 * test['motor_rpm'])) * 100000

## Train

In [183]:
kf = KFold(n_splits=5)
model_list = []

for train_index, test_index in kf.split(train):
    X_train, X_test = train.loc[train_index], train.loc[test_index]

    anom = setup(data = X_train, verbose = 0, session_id = 69, normalize = False)
    model = create_model('abod')
    model_list.append(model)


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

#### hard voting

In [184]:
preds = []
threshold = 0

for m in model_list:
  predictions = predict_model(m, test)
  preds.append(np.array(predictions['Anomaly']))


In [185]:
pred = 1*(np.sum(preds,axis=0) >= len(model_list))

In [190]:
list(pred).count(0), list(pred).count(1)

(7010, 379)

#### soft voting

In [175]:
preds = []
threshold = 0

for m in model_list:
  predictions = predict_model(m, test)
  preds.append(np.array(predictions['Anomaly_Score']))
  threshold += m.threshold_

threshold /= len(model_list)

In [176]:
pred = np.sum(preds,axis=0) / len(model_list)

In [177]:
pred = np.where(pred > threshold,1,0)

In [178]:
list(pred).count(0), list(pred).count(1)

(6959, 430)

## Predict

## Submission

In [191]:
submit = pd.read_csv(data_path+'/answer_sample.csv')

In [192]:
submit['label'] = pred
submit.head()

Unnamed: 0,type,label
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [193]:
submit.to_csv(data_path + '파이캐럿_abod_norm_false_kfold_5hard.csv', index=False)