In [None]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pycaret

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycaret
  Downloading pycaret-3.0.0-py3-none-any.whl (481 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m481.8/481.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Collecting tbats>=1.1.0
  Downloading tbats-1.1.2-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting category-encoders>=2.4.0
  Downloading category_encoders-2.6.0-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.2/81.2 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting sktime>=0.16.1
  Downloading sktime-0.17.1-py3-none-any.whl (16.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.1/16.1 MB[0m [31m47.5 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash
  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2

In [None]:
import random
import pandas as pd
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from pycaret.anomaly import *

from pyod.models.abod import ABOD
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import KFold

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

## Data Load

In [None]:
data_path = '/content/drive/MyDrive/AI_SPARK/dataset'
train = pd.read_csv(data_path+'/train_data.csv')
test = pd.read_csv(data_path+'/test_data.csv')

In [None]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0
...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7


In [None]:
test

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
0,2.51,53.28,0.7,32.54,2662.0,69.58,3.48,0
1,2.66,55.24,0.7,34.45,2819.0,71.32,3.57,0
2,1.72,42.74,0.7,22.23,1819.0,60.21,3.01,0
3,2.20,49.15,0.7,28.50,2332.0,65.91,3.30,0
4,2.06,47.28,0.7,26.67,2182.0,64.24,3.21,0
...,...,...,...,...,...,...,...,...
7384,2.12,48.08,0.7,27.45,2246.0,64.96,3.25,7
7385,1.48,39.63,0.7,19.19,1570.0,57.44,2.87,7
7386,1.56,40.61,0.7,20.15,1649.0,58.32,2.92,7
7387,1.59,40.99,0.7,20.52,1679.0,58.66,2.93,7


## Pre-Processing

#### 1) '마력' 변수 생성

In [None]:
train.loc[train['type'] == 0, 'HP'] = 30
train.loc[train['type'] == 1, 'HP'] = 20
train.loc[train['type'] == 2, 'HP'] = 10
train.loc[train['type'] == 3, 'HP'] = 50
train.loc[train['type'] == 4, 'HP'] = 30
train.loc[train['type'] == 5, 'HP'] = 30
train.loc[train['type'] == 6, 'HP'] = 30
train.loc[train['type'] == 7, 'HP'] = 30

test.loc[test['type'] == 0, 'HP'] = 30
test.loc[test['type'] == 1, 'HP'] = 20
test.loc[test['type'] == 2, 'HP'] = 10
test.loc[test['type'] == 3, 'HP'] = 50
test.loc[test['type'] == 4, 'HP'] = 30
test.loc[test['type'] == 5, 'HP'] = 30
test.loc[test['type'] == 6, 'HP'] = 30
test.loc[test['type'] == 7, 'HP'] = 30

In [None]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type,HP
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0,30.0
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0,30.0
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0,30.0
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0,30.0
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0,30.0
...,...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7,30.0
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7,30.0
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7,30.0
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7,30.0


In [None]:
test

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type,HP
0,2.51,53.28,0.7,32.54,2662.0,69.58,3.48,0,30.0
1,2.66,55.24,0.7,34.45,2819.0,71.32,3.57,0,30.0
2,1.72,42.74,0.7,22.23,1819.0,60.21,3.01,0,30.0
3,2.20,49.15,0.7,28.50,2332.0,65.91,3.30,0,30.0
4,2.06,47.28,0.7,26.67,2182.0,64.24,3.21,0,30.0
...,...,...,...,...,...,...,...,...,...
7384,2.12,48.08,0.7,27.45,2246.0,64.96,3.25,7,30.0
7385,1.48,39.63,0.7,19.19,1570.0,57.44,2.87,7,30.0
7386,1.56,40.61,0.7,20.15,1649.0,58.32,2.92,7,30.0
7387,1.59,40.99,0.7,20.52,1679.0,58.66,2.93,7,30.0


In [None]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2463 entries, 0 to 2462
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   air_inflow     2463 non-null   float64
 1   air_end_temp   2463 non-null   float64
 2   out_pressure   2463 non-null   float64
 3   motor_current  2463 non-null   float64
 4   motor_rpm      2463 non-null   float64
 5   motor_temp     2463 non-null   float64
 6   motor_vibe     2463 non-null   float64
 7   type           2463 non-null   int64  
 8   HP             2463 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 173.3 KB


#### 2) 변수 생성

In [None]:
train['volt'] = ((1 / 0.746) * train['HP']) / train['motor_current']
test['volt'] = ((1 / 0.746) * test['HP']) / test['motor_current']

In [None]:
train['torque'] = train['HP'] / train['motor_rpm']
test['torque'] = test['HP'] / test['motor_rpm']

In [None]:
train['각속도'] = ((1 / 0.746) * train['HP']) / train['torque']
test['각속도'] = ((1 / 0.746) * test['HP']) / test['torque']

In [None]:
train['회전수/진동'] = train['motor_rpm'] / train['motor_vibe']
test['회전수/진동'] = test['motor_rpm'] / test['motor_vibe']

In [None]:
train['회전수/전류'] = train['motor_rpm'] / train['motor_current']
test['회전수/전류'] = test['motor_rpm'] / test['motor_current']

In [None]:
train['temp'] = train['air_end_temp'] + train['motor_temp']
test['temp'] = test['air_end_temp'] + test['motor_temp']

In [None]:
train['회전수/유량'] = train['motor_rpm'] / train['air_inflow']
test['회전수/유량'] = test['motor_rpm'] / test['air_inflow']

In [None]:
train['진동수/유량'] = train['motor_vibe'] / train['air_inflow']
test['진동수/유량'] = test['motor_vibe'] / test['air_inflow']

In [None]:
train['전류/진동수'] = train['motor_current'] / train['motor_vibe']
test['전류/진동수'] = test['motor_current'] / test['motor_vibe']

In [None]:
train['주기'] = (1 / (60 * train['motor_rpm'])) * 100000
test['주기'] = (1 / (60 * test['motor_rpm'])) * 100000

In [None]:
train

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type,HP,volt,torque,각속도,회전수/진동,회전수/전류,temp,회전수/유량,진동수/유량,전류/진동수,주기
0,1.59,41.00,0.7,20.53,1680.0,58.67,2.93,0,30.0,1.958815,0.017857,2252.010724,573.378840,81.831466,99.67,1056.603774,1.842767,7.006826,0.992063
1,2.97,59.28,0.7,38.40,3142.0,74.91,3.75,0,30.0,1.047252,0.009548,4211.796247,837.866667,81.822917,134.19,1057.912458,1.262626,10.240000,0.530448
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,0,30.0,1.626141,0.014829,2711.796247,648.397436,81.803478,107.77,1059.162304,1.633508,7.926282,0.823859
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,0,30.0,1.312911,0.011971,3359.249330,739.233038,81.815214,119.17,1057.383966,1.430380,9.035398,0.665070
4,1.90,45.21,0.7,24.65,2017.0,62.41,3.12,0,30.0,1.631419,0.014874,2703.753351,646.474359,81.825558,107.62,1061.578947,1.642105,7.900641,0.826310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2458,2.28,50.20,0.7,29.53,2416.0,66.84,3.34,7,30.0,1.361818,0.012417,3238.605898,723.353293,81.815103,117.04,1059.649123,1.464912,8.841317,0.689845
2459,2.04,46.94,0.7,26.34,2155.0,63.94,3.20,7,30.0,1.526746,0.013921,2888.739946,673.437500,81.814730,110.88,1056.372549,1.568627,8.231250,0.773395
2460,1.19,35.74,0.7,15.39,1259.0,53.99,2.70,7,30.0,2.613026,0.023828,1687.667560,466.296296,81.806368,89.73,1057.983193,2.268908,5.700000,1.323802
2461,1.21,36.00,0.7,15.64,1280.0,54.22,2.71,7,30.0,2.571258,0.023438,1715.817694,472.324723,81.841432,90.22,1057.851240,2.239669,5.771218,1.302083


## Train

In [None]:
anom = setup(data = train, verbose = 0, session_id = 69, normalize = False)

In [None]:
models()

Unnamed: 0_level_0,Name,Reference
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
abod,Angle-base Outlier Detection,pyod.models.abod.ABOD
cluster,Clustering-Based Local Outlier,pyod.models.cblof.CBLOF
cof,Connectivity-Based Local Outlier,pyod.models.cof.COF
iforest,Isolation Forest,pyod.models.iforest.IForest
histogram,Histogram-based Outlier Detection,pyod.models.hbos.HBOS
knn,K-Nearest Neighbors Detector,pyod.models.knn.KNN
lof,Local Outlier Factor,pyod.models.lof.LOF
svm,One-class SVM detector,pyod.models.ocsvm.OCSVM
pca,Principal Component Analysis,pyod.models.pca.PCA
mcd,Minimum Covariance Determinant,pyod.models.mcd.MCD


In [None]:
kf = KFold(n_splits=10)
model_list = []

for train_index, test_index in kf.split(train):
    X_train, X_test = train.loc[train_index], train.loc[test_index]

    anom = setup(data = X_train, verbose = 0, session_id = 69, normalize = False)
    model = create_model('abod', fraction=0.01)
    model_list.append(model)

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

## Predict

In [None]:
preds = []
threshold = 0

for m in model_list:
  predictions = predict_model(m, test)
  preds.append(np.array(predictions['Anomaly']))

In [None]:
pred = 1*(np.sum(preds,axis=0) >= len(model_list))

In [None]:
list(pred).count(0), list(pred).count(1)

(7082, 307)

## Submission

In [None]:
submit = pd.read_csv(data_path+'/answer_sample.csv')

In [None]:
submit['label'] = pred
submit.head()

Unnamed: 0,type,label
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [None]:
submit.to_csv('/content/drive/MyDrive/AI_SPARK/submit/36.csv', index=False)