In [1]:
import random
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler

from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

## Data Load

In [3]:
train = pd.read_csv('./data/train_data.csv')
test = pd.read_csv('./data/test_data.csv')

In [4]:
train.head(2)

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
0,1.59,41.0,0.7,20.53,1680.0,58.67,2.93,0
1,2.97,59.28,0.7,38.4,3142.0,74.91,3.75,0


In [5]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2463 entries, 0 to 2462
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   air_inflow     2463 non-null   float64
 1   air_end_temp   2463 non-null   float64
 2   out_pressure   2463 non-null   float64
 3   motor_current  2463 non-null   float64
 4   motor_rpm      2463 non-null   float64
 5   motor_temp     2463 non-null   float64
 6   motor_vibe     2463 non-null   float64
 7   type           2463 non-null   int64  
dtypes: float64(7), int64(1)
memory usage: 154.1 KB


## EDA

#### type

In [71]:
train['type'].unique()

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

## Pre-Processing

#### Scaling

In [62]:
num_features = ['air_inflow', 'air_end_temp', 'out_pressure',
                'motor_current', 'motor_rpm', 'motor_temp',
                'motor_vibe']

In [63]:
scaler = MinMaxScaler()
train[num_features] = scaler.fit_transform(train[num_features])
test[num_features] = scaler.transform(test[num_features])

#### One Hot Encoding

In [6]:
# int -> str
train['type'] = train['type'].astype(str)
test['type'] = test['type'].astype(str)

In [7]:
train = pd.get_dummies(train)
test = pd.get_dummies(test)

In [8]:
train.head()

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type_0,type_1,type_2,type_3,type_4,type_5,type_6,type_7
0,1.59,41.0,0.7,20.53,1680.0,58.67,2.93,1,0,0,0,0,0,0,0
1,2.97,59.28,0.7,38.4,3142.0,74.91,3.75,1,0,0,0,0,0,0,0
2,1.91,45.29,0.7,24.73,2023.0,62.48,3.12,1,0,0,0,0,0,0,0
3,2.37,51.33,0.7,30.63,2506.0,67.84,3.39,1,0,0,0,0,0,0,0
4,1.9,45.21,0.7,24.65,2017.0,62.41,3.12,1,0,0,0,0,0,0,0


#### Drop column

In [9]:
train.drop('out_pressure', axis=1, inplace=True)
test.drop('out_pressure', axis=1, inplace=True)

## Train

In [21]:
model = IsolationForest(max_samples=256, n_estimators=200, random_state=69)
model.fit(train)

IsolationForest(max_samples=256, n_estimators=200, random_state=69)

In [22]:
model = OneClassSVM(gamma='auto')
model.fit(train)

OneClassSVM(gamma='auto')

## Predict

### Isolation Forest일 경우

In [16]:
# IsolationForest 모델 출력 (1:정상, -1:불량) 이므로 (0:정상, 1:불량)로 Label 변환
def get_pred_label(model_pred):
    model_pred = np.where(model_pred == 1, 0, model_pred)
    model_pred = np.where(model_pred == -1, 1, model_pred)
    return model_pred

In [17]:
pred_test = model.predict(test)
pred_test = get_pred_label(pred_test)

In [18]:
# 정상 (0), 이상 (1)
list(pred_test).count(0), list(pred_test).count(1)

(1101, 6288)

#### OCSVM

In [23]:
# IsolationForest 모델 출력 (1:정상, -1:불량) 이므로 (0:정상, 1:불량)로 Label 변환
def get_pred_label(model_pred):
    model_pred = np.where(model_pred == 1, 1, model_pred)
    model_pred = np.where(model_pred == -1, 0, model_pred)
    return model_pred

pred_test = model.predict(test)
pred_test = get_pred_label(pred_test)

# 정상 (0), 이상 (1)
list(pred_test).count(0), list(pred_test).count(1)

(6288, 1101)

### 다른 모델일 경우

In [19]:
pred_test = model.predict(test)

In [20]:
# 정상 (0), 이상 (1)
list(pred_test).count(0), list(pred_test).count(1)

(0, 1101)

## Submission

In [24]:
submit = pd.read_csv('./data/answer_sample.csv')

In [25]:
submit['label'] = pred_test
submit.head()

Unnamed: 0,type,label
0,0,1
1,0,0
2,0,0
3,0,0
4,0,0


In [26]:
submit.to_csv('./submit/ocsvm.csv', index=False)