In [1]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pycaret

In [47]:
import random
import pandas as pd
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold, StratifiedKFold
from imblearn.over_sampling import SMOTE
import imblearn
from imblearn.under_sampling import RandomUnderSampler, TomekLinks, ClusterCentroids

from pycaret.anomaly import *

from pyod.models.abod import ABOD

import warnings
warnings.filterwarnings(action='ignore')

In [48]:
# Random Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(69) # Seed 고정

In [49]:
def preprocessing(train, test):
  train.loc[train['type'] == 0, 'HP'] = 30
  train.loc[train['type'] == 1, 'HP'] = 20
  train.loc[train['type'] == 2, 'HP'] = 10
  train.loc[train['type'] == 3, 'HP'] = 50
  train.loc[train['type'] == 4, 'HP'] = 30
  train.loc[train['type'] == 5, 'HP'] = 30
  train.loc[train['type'] == 6, 'HP'] = 30
  train.loc[train['type'] == 7, 'HP'] = 30

  test.loc[test['type'] == 0, 'HP'] = 30
  test.loc[test['type'] == 1, 'HP'] = 20
  test.loc[test['type'] == 2, 'HP'] = 10
  test.loc[test['type'] == 3, 'HP'] = 50
  test.loc[test['type'] == 4, 'HP'] = 30
  test.loc[test['type'] == 5, 'HP'] = 30
  test.loc[test['type'] == 6, 'HP'] = 30
  test.loc[test['type'] == 7, 'HP'] = 30

  train['volt'] = ((1 / 0.746) * train['HP']) / train['motor_current']
  test['volt'] = ((1 / 0.746) * test['HP']) / test['motor_current']

  train['torque'] = train['HP'] / train['motor_rpm']
  test['torque'] = test['HP'] / test['motor_rpm']

  train['각속도'] = ((1 / 0.746) * train['HP']) / train['torque']
  test['각속도'] = ((1 / 0.746) * test['HP']) / test['torque']

  train['회전수/진동'] = train['motor_rpm'] / train['motor_vibe']
  test['회전수/진동'] = test['motor_rpm'] / test['motor_vibe']

  train['회전수/전류'] = train['motor_rpm'] / train['motor_current']
  test['회전수/전류'] = test['motor_rpm'] / test['motor_current']

  train['temp'] = train['air_end_temp'] + train['motor_temp']
  test['temp'] = test['air_end_temp'] + test['motor_temp']

  train['회전수/유량'] = train['motor_rpm'] / train['air_inflow']
  test['회전수/유량'] = test['motor_rpm'] / test['air_inflow']

  train['진동수/유량'] = train['motor_vibe'] / train['air_inflow']
  test['진동수/유량'] = test['motor_vibe'] / test['air_inflow']

  train['전류/진동수'] = train['motor_current'] / train['motor_vibe']
  test['전류/진동수'] = test['motor_current'] / test['motor_vibe']

  train['주기'] = (1 / (60 * train['motor_rpm'])) * 100000
  test['주기'] = (1 / (60 * test['motor_rpm'])) * 100000

  return train, test

In [81]:
data_path = '/content/drive/MyDrive/Colab Notebooks/Air_Pressure'
train = pd.read_csv(data_path+'/train_data.csv')
test = pd.read_csv(data_path+'/test_data.csv')

## Sampling

In [82]:
label = train['type']
sm = ClusterCentroids(sampling_strategy='auto', )
train, label = sm.fit_resample(train, label)

## Scaler

In [83]:
# features
num_features = ['air_inflow', 'air_end_temp', 'out_pressure',
                'motor_current', 'motor_rpm', 'motor_temp',
                'motor_vibe']

# Scaler
scaler = MinMaxScaler()
train[num_features] = scaler.fit_transform(train[num_features])
test[num_features] = scaler.transform(test[num_features])

## 차원축소

In [84]:
# PCA
pca = PCA(n_components=2)
train_pca = pca.fit_transform(train)

pca = PCA(n_components=2)
test_pca = pca.fit_transform(test)

train_pca = pd.DataFrame(data=train_pca, columns = ['principal component1', 'principal component2'])
test_pca = pd.DataFrame(data=test_pca, columns = ['principal component1', 'principal component2'])

In [85]:
# tsne
# tsne = TSNE(n_components=1)
# train_tsne = tsne.fit_transform(train)

# tsne = TSNE(n_components=1)
# test_tsne = tsne.fit_transform(test)

# train_tsne = pd.DataFrame(data=train_tsne, columns = ['principal component3'])
# test_tsne = pd.DataFrame(data=test_tsne, columns = ['principal component3'])

In [86]:
# 원본 데이터를 다시 read
train = pd.read_csv(data_path+'/train_data.csv')
test = pd.read_csv(data_path+'/test_data.csv')

train, test = preprocessing(train, test)

In [87]:
# concat
train = pd.concat([train, train_pca], axis=1)
test = pd.concat([test, test_pca], axis=1)

# train = pd.concat([train, train_tsne], axis=1)
# test = pd.concat([test, test_tsne], axis=1)

# train = pd.concat([train, train_pca, train_tsne], axis=1)
# test = pd.concat([test, test_pca, test_tsne], axis=1)

## KFold

In [88]:
# KFold
kf = KFold(n_splits=20)
model_list = []

for train_index, test_index in kf.split(train):
    X_train, X_test = train.loc[train_index], train.loc[test_index]

    anom = setup(data = X_train, verbose = 0, session_id = 69, normalize = False)
    model = create_model('abod', fraction=0.02)
    model_list.append(model)

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

In [89]:
preds = []
threshold = 0

for m in model_list:
  predictions = predict_model(m, test)
  preds.append(np.array(predictions['Anomaly']))

pred = 1*(np.sum(preds,axis=0) >= len(preds))

In [90]:
list(pred).count(0), list(pred).count(1)

(7052, 337)

In [91]:
submit = pd.read_csv(data_path+'/answer_sample.csv')
submit['label'] = pred
submit.to_csv('/content/drive/MyDrive/Colab Notebooks/Air_Pressure/0420_Highest_PCA_2.csv', index=False)