In [1]:
from src import dataset_, model_, train_, inference_
from src.utils import *
import seaborn as sns
import umap
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings(action='ignore')

device = torch.device('cuda:{}'.format(0) if torch.cuda.is_available() else 'cpu')

import random

Make sample data

In [2]:
oes_sensor_numbers = 30
sensor_list = [f'OES_{num}' for num in range(1, oes_sensor_numbers + 1)]
pretrain_data_num, train_data_num, test_data_num = 100, 400, 200

data_num_dict = {
    'pre_train' : pretrain_data_num,
    'train' : train_data_num,
    'test' : test_data_num
}

min_data_length, max_data_length = 103, 109
data_key_list = ['key']

dataset_df_dict = {}
for idx, (dataset_name, data_num_range) in enumerate(data_num_dict.items()):
    dataset_df_list = []
    for data_index in range(data_num_range):
        if idx == 1:
            classes = [0, 1, -1]
            class_probabilities = [0.7, 0.2, 0.1]
        elif idx == 2:
            classes = [0, 1]
            class_probabilities = [0.8, 0.2]
        else:
            classes = [0]
            class_probabilities = [1.0]
        
        sampling_size = random.sample(range(min_data_length, max_data_length), 1)[0]
        class_label = random.choices(classes, class_probabilities, k=1)[0]    

        generation_array = np.random.rand(sampling_size, oes_sensor_numbers)
        generation_df = pd.DataFrame(generation_array, columns=sensor_list)
        generation_df['key'] = data_index
        generation_df['label'] = class_label
        dataset_df_list.append(generation_df.reset_index(drop=True))

    dataset_df_dict[dataset_name] = dataset_df_list

pre_train_set, train_set, test_set = dataset_df_dict['pre_train'], dataset_df_dict['train'], dataset_df_dict['test']

fin_dataset_list = [pre_train_set, train_set, test_set]

Make Datalaoder

In [3]:
max_len = max_data_length

pre_train_dataset = dataset_.dataset_ae(fin_dataset_list[0], max_len, sensor_list)
train_dataset = dataset_.dataset_ae(fin_dataset_list[1], max_len, sensor_list)
test_dataset = dataset_.dataset_ae(fin_dataset_list[2], max_len, sensor_list)

pre_train_loader = DataLoader(pre_train_dataset, batch_size=20, drop_last=True)
train_loader = DataLoader(train_dataset, batch_size=20, drop_last=True, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size=12, drop_last=False)

Pre train model

In [4]:
sensor_num = pre_train_dataset[0][0].shape[0]
latent_dim = 100
pre_model = model_.base_AE(sensor_num, latent_dim).to(device)

test_tensor = torch.rand(15, sensor_num, pre_train_dataset[0][0].shape[1]).to(device)
squeezed_x, squeezed_x_hat, latent_x, layer_output = pre_model(test_tensor)

criterion = torch.nn.MSELoss().to(device)

loss = train_.loss_base_ae(pre_model, test_tensor, criterion)

Get Hypersphere

In [5]:
# hyper parameters
final_center = inference_.get_hypersphere_center(pre_model, pre_train_loader, 
                                                 device, type = 'ae')

100%|██████████| 5/5 [00:00<00:00, 26.67it/s]


Train SS-LAD

In [6]:
# Attach pre train Encoder parameter to Deep_SAD
AE = model_.base_AE(sensor_num, latent_dim).to(device)
deep_sad_model = AE.encoder

# hyper parameters
n_epochs = 500
lr = 0.001
eta = 10 # 레이블 데이터에 대한 가중치 정도
eps = 1e-6 # dist 를 최소 0 초과로 만들기 위함
type_ = 'ae'

deep_sad_model.train()
for batch, (data, label) in enumerate(train_loader, 1):
    data = data.float().to(device)
    label = label.to(device)

    # calculate the loss
    dist, loss = train_.deep_sad_loss(data, label, deep_sad_model, 
                                      final_center, eta, eps, type_)
    break