In [1]:
import random
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler

from pyod.models.abod import ABOD

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

## Data Load

In [3]:
train = pd.read_csv('./data/train_data.csv')
test = pd.read_csv('./data/test_data.csv')

## Pre-Processing

#### HP

In [4]:
train.loc[train['type'] == 0, 'HP'] = 30
train.loc[train['type'] == 1, 'HP'] = 20
train.loc[train['type'] == 2, 'HP'] = 10
train.loc[train['type'] == 3, 'HP'] = 50
train.loc[train['type'] == 4, 'HP'] = 30
train.loc[train['type'] == 5, 'HP'] = 30
train.loc[train['type'] == 6, 'HP'] = 30
train.loc[train['type'] == 7, 'HP'] = 30

test.loc[test['type'] == 0, 'HP'] = 30
test.loc[test['type'] == 1, 'HP'] = 20
test.loc[test['type'] == 2, 'HP'] = 10
test.loc[test['type'] == 3, 'HP'] = 50
test.loc[test['type'] == 4, 'HP'] = 30
test.loc[test['type'] == 5, 'HP'] = 30
test.loc[test['type'] == 6, 'HP'] = 30
test.loc[test['type'] == 7, 'HP'] = 30

In [5]:
train['volt'] = ((1 / 0.746) * train['HP']) / train['motor_current']
test['volt'] = ((1 / 0.746) * test['HP']) / test['motor_current']

In [6]:
train['torque'] = train['HP'] / train['motor_rpm']
test['torque'] = test['HP'] / test['motor_rpm']

In [7]:
train['각속도'] = ((1 / 0.746) * train['HP']) / train['torque']
test['각속도'] = ((1 / 0.746) * test['HP']) / test['torque']

In [8]:
train['회전수_진동'] = train['motor_rpm'] / train['motor_vibe']
test['회전수_진동'] = test['motor_rpm'] / test['motor_vibe']

In [9]:
train['회전수_전류'] = train['motor_rpm'] / train['motor_current']
test['회전수_전류'] = test['motor_rpm'] / test['motor_current']

In [10]:
train['temp'] = train['air_end_temp'] + train['motor_temp']
test['temp'] = test['air_end_temp'] + test['motor_temp']

In [11]:
train['회전수/유량'] = train['motor_rpm'] / train['air_inflow']
test['회전수/유량'] = test['motor_rpm'] / test['air_inflow']

In [12]:
train['진동수/유량'] = train['motor_vibe'] / train['air_inflow']
test['진동수/유량'] = test['motor_vibe'] / test['air_inflow']

In [13]:
train['전류/진동수'] = train['motor_current'] / train['motor_vibe']
test['전류/진동수'] = test['motor_current'] / test['motor_vibe']

In [14]:
train['주기'] = (1 / (60 * train['motor_rpm'])) * 100000
test['주기'] = (1 / (60 * test['motor_rpm'])) * 100000

## 모델 분할

#### train

In [15]:
idx = 0
train_idx = []
for i in range(8):
    idx += len(pd.DataFrame(train.loc[train['type'] == i]))
    train_idx.append(idx)

print(train_idx)

[432, 801, 1167, 1473, 1779, 2028, 2277, 2463]


In [16]:
train_0 = train[0:train_idx[0]]
train_1 = train[train_idx[0]:train_idx[1]]
train_2 = train[train_idx[1]:train_idx[2]]
train_3 = train[train_idx[2]:train_idx[3]]
train_4 = train[train_idx[3]:train_idx[4]]
train_5 = train[train_idx[4]:train_idx[5]]
train_6 = train[train_idx[5]:train_idx[6]]
train_7 = train[train_idx[6]:train_idx[7]]

#### test

In [17]:
idx = 0
test_idx = []
for i in range(8):
    idx += len(pd.DataFrame(test.loc[test['type'] == i]))
    test_idx.append(idx)
    
print(test_idx)

[1296, 2403, 3501, 4419, 5337, 6084, 6831, 7389]


In [18]:
test_0 = test[0:test_idx[0]]
test_1 = test[test_idx[0]:test_idx[1]]
test_2 = test[test_idx[1]:test_idx[2]]
test_3 = test[test_idx[2]:test_idx[3]]
test_4 = test[test_idx[3]:test_idx[4]]
test_5 = test[test_idx[4]:test_idx[5]]
test_6 = test[test_idx[5]:test_idx[6]]
test_7 = test[test_idx[6]:test_idx[7]]

## Train, Predict

In [19]:
def train(train, test):
    model = ABOD(contamination=0.1, method='fast', n_neighbors=8)
    model.fit(train)
    model_pred = model.predict(test)
    
    # ISO
    model_pred = np.where(model_pred == -1, 0, model_pred)
    model_pred = np.where(model_pred == 1, 1, model_pred)
    
    return model_pred

In [20]:
pred_0 = train(train_0, test_0)
pred_1 = train(train_1, test_1)
pred_2 = train(train_2, test_2)
pred_3 = train(train_3, test_3)
pred_4 = train(train_4, test_4)
pred_5 = train(train_5, test_5)
pred_6 = train(train_6, test_6)
pred_7 = train(train_7, test_7)

In [21]:
pred = list(pred_0) + list(pred_1) + list(pred_2) + list(pred_3) + list(pred_4) + list(pred_5) + list(pred_6) + list(pred_7)
len(pred)

7389

In [22]:
# 정상 (0), 이상 (1)
list(pred).count(0), list(pred).count(1)

(6510, 879)

## Submission

In [23]:
submit = pd.read_csv('./data/answer_sample.csv')

In [24]:
submit['label'] = pred
submit.head()

Unnamed: 0,type,label
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [25]:
submit.to_csv('./submit/ABOD_8.csv', index=False)