In [1]:
from train import *

In [2]:
model = transformer2vector()

In [3]:
class testProfileDataset(Dataset):
    def __init__(self, data_path, meta_path, mask_path, feature_path):
        try:
            import pandas as pd
            import numpy as np
            import random
        except:
            raise("pandas or numpy not found")
        try:
            self.data = pd.read_pickle(data_path)
            self.meta = pd.read_pickle(meta_path)
            self.mask = pd.read_pickle(mask_path)
            self.feature = pd.read_pickle(feature_path)
        except:
            raise("load data failed")
        self.seed = 19260817
        random.seed(self.seed)
        only_one_model_batch = []
        for k,v in self.feature.items():
            if sum(v - np.ones(8)<0) < 2:
                only_one_model_batch.append(k)
        select_from_meta = []
        for model, batch in only_one_model_batch:
            s = self.meta[(self.meta.model == model) & (self.meta.batch == batch)]
            select_from_meta.append(s)
        for x in select_from_meta:
            self.meta.drop(x.index, inplace=True)
    def __len__(self):
        return len(self.meta)
    
    def __getitem__(self, idx):
        gpu, model, batch, length, time_a, gpu_mem_a, cpu_mem_a = self.meta.iloc[idx]
        name_path = "{}_{}_{}".format(gpu, model, batch)
        raw_data = self.data[name_path]
        mask_vector = self.mask[name_path]
        kernel = raw_data[:, 0]
        vector = raw_data[:, 1:13]
        vector = vector.astype(np.float32)
        feature_vec = self.feature[(model, batch)]
        feature_vec = feature_vec.astype(np.float32) / np.max(feature_vec)
        return kernel, vector, feature_vec, mask_vector, length, "{}_{}_{}".format(gpu, model, batch)

In [4]:
profile_dataset = testProfileDataset("data.pkl", "meta.pkl", "mask.pkl", "feature.pkl")
dataloader = DataLoader(profile_dataset, batch_size=130, shuffle=True, num_workers=4)

In [5]:
class ServeRandomMaskModule(nn.Module):
    def __init__(self, train_mask_module :RandomMaskModule):
        super(ServeRandomMaskModule, self).__init__()
        self.embedding_layer = train_mask_module.embedding_layer
        self.mlp = train_mask_module.mlp

    def forward(self, number_seq_input, vec_seq_input, mask):
        # extra_mask = torch.rand(number_seq_input.shape).to(self.device_indicator.device)
        # extra_mask = extra_mask < vector_rate
        # amsk is extra_mask or mask
        # mask = extra_mask | mask

        embedded_numbers = self.embedding_layer(number_seq_input)
        masked_embedded_numbers = embedded_numbers * (1 - mask.unsqueeze(-1).float())

        mlp_output = self.mlp(vec_seq_input)
        masked_mlp_output = mlp_output * mask.unsqueeze(-1).float()

        output = masked_embedded_numbers + masked_mlp_output

        return output

In [25]:
model.load_state_dict(torch.load("correct_model_200.pt"))

<All keys matched successfully>

In [15]:
# serve_model = ServeRandomMaskModule(model.mask_module.to('cpu'))

In [16]:
# model.mask_module = serve_model

In [26]:
device = 'cuda'
model = model.to(device)

error_rate_each = []
real_rate = []

for kernel, vector, feature_vec, mask, length, name in dataloader:
    # print(name)
    kernel = kernel.to(device)
    vector = vector.to(device)
    # feature_vec = feature_vec.to(device)
    mask = mask.to(device)
    length = length.to(device)
    o = model(kernel, vector, mask, length)
    o = o.cpu()
    o = o * (feature_vec > 0)
    o_m, _ = torch.max(o, dim=1, keepdim=True)
    o = o / o_m
    o = o.cpu().detach().numpy()
    f = feature_vec.cpu().detach().numpy()
    for i in range(len(o)):
        error_rate_each.append(np.mean(np.abs(o[i] - f[i])))

In [27]:
error = np.array(error_rate_each)
error_non_zero = error[error > 0]
print("error mean: ", np.mean(error_non_zero))
print("error std: ", np.std(error_non_zero))
print("error max: ", np.max(error_non_zero))

error mean:  0.01695101
error std:  0.023317022
error max:  0.1892632
