# extract features of transformer[ok]

In [1]:
# Data Analysis Tools
import pandas as pd
import numpy as np

# Visualization Tools
import matplotlib.pyplot as plt
import seaborn as sns

import os
from tqdm import tqdm
import datetime

import pickle

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split

In [3]:
# Transformer for time series Class
from utils.transformers_tst.tst import Transformer
from utils.transformers_tst.src.utils import compute_loss
from utils.transformers_tst.src.visualization import map_plot_function, plot_values_distribution, plot_error_distribution, plot_errors_threshold, plot_visual_sample

In [4]:
# Plot curve
import utils.functions_plot as PL

In [5]:
# Load phm2016 cmp dataset
X_train = np.load("./data phm 2016/X_train_r_modeI_chamber4_mm.npy")
y_train = np.load("./data phm 2016/y_train_modeI_chamber4_mm.npy")
X_test = np.load("./data phm 2016/X_test_r_modeI_chamber4_mm.npy")
y_test = np.load("./data phm 2016/y_test_modeI_chamber4_mm.npy")
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)

X_train shape:  (798, 263, 19)
y_train shape:  (798, 1)
X_test shape:  (165, 263, 19)
y_test shape:  (165, 1)


In [6]:
# 基本参数
wafer_number, max_batch_length, variable_number = X_train.shape
wafer_number_test = X_test.shape[0]
print('训练集晶圆个数：', wafer_number)
print('最长时间序列长度：', max_batch_length)
print('字段个数：', variable_number)
print('训练集晶圆个数：', wafer_number_test)

训练集晶圆个数： 798
最长时间序列长度： 263
字段个数： 19
训练集晶圆个数： 165


In [7]:
class CMPModeIChamber4Dataset(Dataset):
    """Torch dataset for Oze datachallenge training.
    Attributes
    ----------
    x: np.array
        Dataset target of shape (wafer_number, seq_length, variable_number).
    
    y: np.array
        Dataset target of shape (wafer_number, 1).
    """

    def __init__(self, dataset_x, dataset_y, **kwargs):
        """Load dataset from csv.
        Parameters
        ---------
        dataset_x: Tuple
            Tuple of shape (wafer_number, seq_length, variable_number).
        dataset_y: Tuple
            Tuple of shape (wafer_number, 1).
        """
        super().__init__(**kwargs)

        self._x = dataset_x.astype(np.float32)
        self._y = dataset_y.astype(np.float32)
        
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        return (self._x[idx], self._y[idx])
    
    def __len__(self):
        return self._x.shape[0]

    def get_x_shape(self):
        """get_x_shape"""
        return self._x.shape

    def get_y_shape(self):
        """get_y_shape"""
        return self._y.shape

In [8]:
cmp_train = CMPModeIChamber4Dataset(X_train, y_train)
cmp_test = CMPModeIChamber4Dataset(X_test, y_test)

In [9]:
BATCH_SIZE = 100
NUM_WORKERS = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device {device}")

Using device cpu


In [10]:
dataloader_train = DataLoader(cmp_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              pin_memory=False
                             )
dataloader_test = DataLoader(cmp_test,
                             batch_size=BATCH_SIZE,
                             shuffle=False,
                             num_workers=NUM_WORKERS
                            )

In [11]:
# load re-training net
net = pickle.load(open('./results_save/tensforflow_without_validation/epocha400_batchsize100_net-2.pkl', 'rb'))

In [12]:
print(net)

Transformer(
  (layers_encoding): ModuleList(
    (0): Encoder(
      (_selfAttention): MultiHeadAttention(
        (_W_q): Linear(in_features=8, out_features=16, bias=True)
        (_W_k): Linear(in_features=8, out_features=16, bias=True)
        (_W_v): Linear(in_features=8, out_features=16, bias=True)
        (_W_o): Linear(in_features=16, out_features=8, bias=True)
      )
      (_feedForward): PositionwiseFeedForward(
        (_linear1): Linear(in_features=8, out_features=2048, bias=True)
        (_linear2): Linear(in_features=2048, out_features=8, bias=True)
      )
      (_layerNorm1): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
      (_layerNorm2): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
      (_dopout): Dropout(p=0.2, inplace=False)
    )
    (1): Encoder(
      (_selfAttention): MultiHeadAttention(
        (_W_q): Linear(in_features=8, out_features=16, bias=True)
        (_W_k): Linear(in_features=8, out_features=16, bias=True)
        (_W_v): Linear(in_fe

# extract features

In [13]:
for name, module in net._modules.items():
    print(name)
    if name=='layers_encoding':
        for n, m in module._modules.items():
            print(n)
            if n=='0':
                module_encoding_0 = m
            elif n=='1':
                module_encoding_1 = m
    elif name=='layers_decoding':
        for n, m in module._modules.items():
            print(n)
            if n=='0':
                module_decoding_0 = m
            elif n=='1':
                module_decoding_1 = m
    elif name=='_embedding':
        module_embedding = module
    elif name=='_linear':
        module_linear = module
    elif name=='_flatten':
        module_flatten = module
    elif name=='_output':
        module_output = module

layers_encoding
0
1
layers_decoding
0
1
_embedding
_linear
_flatten
_output


In [14]:
# 输出test，缺少stack
def extract_transformer_features(dataloader:DataLoader):
    predictions = np.empty(shape=(len(dataloader.dataset), 263))
    idx_prediction = 0
    with torch.no_grad():
        for x, y in tqdm(dataloader, total=len(dataloader)):
            K = x.shape[1]
            # embedding
            embedding = module_embedding(x)
            # positional embedding for encoding
            pe_params = {'period': net._pe_period} if net._pe_period else {}
            positional_encoding = net._generate_PE(K, net._d_model, **pe_params)
            positional_encoding = positional_encoding.to(embedding.device)
            embedding.add_(positional_encoding)
            # 2 layer encoder
            encoding_0 = module_encoding_0(embedding)
            encoding_1 = module_encoding_1(encoding_0)
            
            # decoding
            decoding_0 = encoding_1
            # positional embedding for decoding
            positional_encoding = net._generate_PE(K, net._d_model)
            positional_encoding = positional_encoding.to(decoding_0.device)
            decoding_0.add_(positional_encoding)
            # 2 layer decoder
            decoding_0 = module_decoding_0(decoding_0, encoding_1)
            decoding_1 = module_decoding_1(decoding_0, encoding_1)
            # linear
            linear = module_linear(decoding_1)
            # flatten
            flatten = module_flatten(linear)

            predictions[idx_prediction:idx_prediction+x.shape[0]] = flatten
            idx_prediction += x.shape[0]
    
    return predictions

In [15]:
predictions_train = extract_transformer_features(dataloader=dataloader_train)
predictions_test = extract_transformer_features(dataloader=dataloader_test)
print(predictions_train.shape,predictions_test.shape)

100%|██████████| 8/8 [00:12<00:00,  1.56s/it]
100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

(798, 263) (165, 263)





In [16]:
# save
pickle.dump(predictions_train, open('./results_save/tensforflow_without_validation/features_tf_withoutPE_train.pkl', 'wb'))
pickle.dump(predictions_test, open('./results_save/tensforflow_without_validation/features_tf_withoutPE_test.pkl', 'wb'))

# Define new struction[not ok]

In [17]:
# class TransformerFlatten(Transformer):
#     def __init__(self, net:Transformer):
#         super().__init__()
#         self.model = nn.Sequential()
#         self.model.add_module(name='linear_1', module=list(net.children())[2]) #Linear(in_features=19, out_features=8, bias=True)
#         self.model.add_module(name='encoder', module=list(net.children())[0]) # encoder
#         self.model.add_module(name='decoder', module=list(net.children())[1]) # decoder
#         self.model.add_module(name='linear_2', module=list(net.children())[3]) # Linear(in_features=8, out_features=1, bias=True)
#         self.model.add_module(name='flatten', module=list(net.children())[4]) # flatten
#     def forward(self, x: torch.Tensor) -> torch.Tensor:
#         return self.model(x)

In [18]:
# new_net = nn.Sequential()
# new_net.add_module(name='linear_1', module=list(net.children())[2]) #Linear(in_features=19, out_features=8, bias=True)
# new_net.add_module(name='encoder', module=list(net.children())[0]) # encoder
# new_net.add_module(name='decoder', module=list(net.children())[1]) # decoder
# new_net.add_module(name='linear_2', module=list(net.children())[3]) # Linear(in_features=8, out_features=1, bias=True)
# new_net.add_module(name='flatten', module=list(net.children())[4]) # flatten
# print(new_net)

In [19]:
# new_net = TransformerFlatten(net)
# print(new_net.model)

In [20]:
# extract some layers before flattern
# new_model = nn.Sequential(list(net.children())[5])
# new_model = nn.Sequential(*list(net.children())[:6])
# print
#0:encoder; 1:decoder; *
#2:Linear(in_features=19, out_features=8, bias=True)
#3:Linear(in_features=8, out_features=1, bias=True)
#4:Flatten(start_dim=1, end_dim=-1)
#5:Linear(in_features=263, out_features=1, bias=True)
#sequence: 2/*0/*1/3/4/


In [21]:
# predict for test

In [22]:
# def predict_new_net(net:nn.Module, dataloader:DataLoader):
#     predictions = np.empty(shape=(len(dataloader.dataset), 1))

#     idx_prediction = 0
#     with torch.no_grad():
#         for x, y in tqdm(dataloader, total=len(dataloader)):
#             netout = net(x.to(device)).cpu().numpy()
#             predictions[idx_prediction:idx_prediction+x.shape[0]] = netout
#             idx_prediction += x.shape[0]
            
#     return predictions

In [23]:
# out = x.to(device)
# out = list(net.children())[2](out)
# out = list(net.children())[0][0](out)
# out = list(net.children())[0][1](out)
# out = list(net.children())[1][0](out)
# out = list(net.children())[1][1](out)
# out = list(net.children())[3](out)
# out = list(net.children())[4](out)

In [24]:
# list(net.children())[0][1]

In [25]:
# # predict for train
# predictions_train = predict_new_net(new_net, dataloader_test)