In [1]:
#!/home/zwl/miniconda3/envs/asr/bin/python3
import torch
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim
from data import Data, MyDataLoader, MyDataset
from trainer import Trainer
from utils import set_seed
from pathlib import Path

import logging
import logging.config
from utils import get_logging_config
import gc
import numpy as np

In [2]:
# logger set
logging.config.dictConfig(get_logging_config(file_name='recogization_test.log'))
logger = logging.getLogger('logger')

set_seed(42)

train_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}

test_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}

print('Stage1: data load')
data = Data(logger)
x_train, x_test, y_train, y_test = data.read_9_2_data_as_30_phones_by_day()
# 如果只是单纯训练模型，则只要将下面注释即可，如果要未知源识别，则取消注释下面代码
x_train, x_test, x_val, y_train, y_test, y_val = data.recognization_data_process(x_train, x_test, y_train, y_test)
gc.collect()
print('load data successful')
x_train = data.process(x_train)
x_test = data.process(x_test)
x_val = data.process(x_val) #模型训练部分不需要val

Stage1: data load


y_test bincount :[24305 32163 56972 35924 21128 37325 21668 33597 39180 41764 48416 21767
 27981 23206 30245 43291 26684 39425 22082 22419 21028 43255 41950 24148
 23184 53472 31008 23939 46712 32136]
y_train bincount:[24194 32102 56964 35904 21110 37128 21652 33770 39282 41724 48391 21573
 27892 23200 30154 43178 26551 39411 21985 22405 21024 43342 41989 24026
 23117 53214 30826 24090 46866 32149]
y_test bincount :[39180 41764 48416 21767 27981 23206 30245 43291 26684 39425 22082 22419
 21028 43255 41950 24148 23184 53472 31008 23939 46712 32136]
y_train bincount:[39282 41724 48391 21573 27892 23200 30154 43178 26551 39411 21985 22405
 21024 43342 41989 24026 23117 53214 30826 24090 46866 32149]
y_val bincount:[525906 436375]


load data successful


In [4]:
del x_train, y_train

In [5]:
del x_test, y_test

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = resnet18()
model.conv1 = nn.Conv2d(2, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.maxpool = nn.AdaptiveAvgPool2d(output_size=(7, 7))
model.fc = nn.Linear(512, len(np.bincount(y_test)), bias=True)
model.load_state_dict(torch.load('./model/9_2data_read_data_as_30_phones_by_day_resnet_StandarScale.model'))
model = model.to(device)

In [16]:
np.bincount(y_val)

array([525906, 436375])

In [17]:
# 特征提取测试，看神经网络哪一层的输出事比较好的结果
# 1. 直接提取输出层
#type(x_val[0])
unrecog = model(torch.tensor(x_val[0:2, :]).cuda().float()).cpu()
recog = model(torch.tensor(x_val[600000:600000+2, :]).cuda().float()).cpu()

In [18]:
print(unrecog)
print(recog)

tensor([[ -7.3342, -11.0766,  -3.7140,  -9.4895,  -5.6619,  -2.6834,  -3.5643,
          -7.3402,  -3.3576,  -6.2765,  -9.7875,  -9.2831,  -4.5601,  -8.5839,
          -1.7162,  -5.9391,  -6.7807,  -8.0487,  -4.2620,  -9.3888,  -6.3493,
          -9.6943],
        [ -9.5175,  -0.1114,  -9.7081,  -8.6638, -10.1976,  -9.5350, -11.1217,
          -4.1878, -12.0176,  -8.2710,  -4.2189,  -9.6429,  -9.1712,  -0.7747,
         -13.8330,  -8.6009, -11.7026,  -4.6962,  -9.6907, -10.8896,  -4.4016,
          -6.1932]], grad_fn=<CopyBackwards>)
tensor([[-10.5961,  -3.4720, -16.9160, -14.9896,  -8.0082,  -2.8703, -11.1524,
         -12.0226,  -3.9791,  -7.0316, -11.4799,  -9.0377,  -7.7611,  -4.3624,
          -4.1402,  -9.6177,  -6.5700,  -9.9683,  -7.0765, -11.7273,  -2.4924,
          -3.3978],
        [ -7.2578, -11.2299,   3.7462,  -2.5240,  -8.6796, -10.7125,  -8.8843,
          -0.5616, -11.6816,  -8.3758,  -6.0592, -11.2445,  -7.6342,  -7.7002,
         -15.1438,  -6.8952, -13.0014,  -3.37

In [81]:
# 使用下面方法获得中间层特征
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

model.avgpool.register_forward_hook(get_activation(model.avgpool))
tensor = torch.FloatTensor(test).to(device)
model(tensor).argmax(dim=1).cpu().numpy()

# 查看字典，会发现中间层特征已经存储到字典中了

array([19, 19, 19, 19])

In [82]:
for key, value in activation.items():
    activation[key] = value.reshape(-1, 512)

In [83]:
for key, value in activation.items():
    print(value.shape)

torch.Size([4, 512])


In [33]:
np.bincount(y_train)

array([39282, 41724, 48391, 21573, 27892, 23200, 30154, 43178, 26551,
       39411, 21985, 22405, 21024, 43342, 41989, 24026, 23117, 53214,
       30826, 24090, 46866, 32149])

In [36]:
import random
random.sample(range(20), 10)

[15, 2, 16, 14, 17, 4, 8, 13, 9, 0]

In [44]:
train_idx = random.sample(range(len(y_train)), int(len(y_train)*0.3))

In [45]:
np.bincount(y_train[train_idx])

array([11801, 12617, 14580,  6468,  8320,  6969,  9044, 12939,  7916,
       11815,  6462,  6750,  6435, 12998, 12554,  7162,  6864, 15952,
        9359,  7259, 14012,  9640])

In [46]:
train_sample = x_train[train_idx, :]

In [57]:
train_dataset = MyDataset(train_sample, y_train[train_idx])

In [73]:
train_loader = MyDataLoader(train_dataset, shuffle=False, batch_size=128)

In [61]:
print_base = {key:[] for key in range(10)}

In [284]:
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

def regroup(print_base, activation, label):
    for key, value in activation.items():
        activation[key] = value.reshape(-1, 512).cpu()
        for i in range(num_classes):
            idx = (label == i)
            print_base[i].extend(activation[key][idx, :])
    return print_base
        
num_classes = len(np.bincount(y_train))
print_base = {key:[] for key in range(num_classes)}
with torch.no_grad():
    model.eval()
    #model.avgpool.register_forward_hook(get_activation(model.avgpool))
    model.avgpool.register_forward_hook(get_activation(model.fc))
    for feature, label in train_loader:
        activation = {}
        feature = feature.reshape(-1, 2, 64, 64).to(device)
        label = label.to(device)
        model(feature)
        print_base = regroup(print_base, activation, label)

In [290]:
gc.collect()

2818

In [288]:
for key, value in print_base.items():
    #value = torch.cat(value).reshape(-1, 512)
    value = torch.cat(value).reshape(-1, num_classes)
    print(value.shape)
    assert 0
    print_base[key] = torch.sum(value, 0) / value.shape[0] # 对向量求和 并求平均

RuntimeError: shape '[-1, 22]' is invalid for input of size 12084224

In [94]:
np.bincount(y_train[train_idx])

array([11801, 12617, 14580,  6468,  8320,  6969,  9044, 12939,  7916,
       11815,  6462,  6750,  6435, 12998, 12554,  7162,  6864, 15952,
        9359,  7259, 14012,  9640])

In [97]:
val_config = {
    'batch_size':128,
    'shuffle':False,
    'drop_last':False,
    'pin_memory':True,
}
val_dataset = MyDataset(x_val, y_val)
val_loader = MyDataLoader(val_dataset, **val_config)

In [107]:
val_feature = []
val_label = []
with torch.no_grad():
    model.eval()
    model.avgpool.register_forward_hook(get_activation(model.avgpool))
    for feature, label in val_loader:
        activation = {}
        feature = feature.reshape(-1, 2, 64, 64).to(device)
        label = label.to(device)
        model(feature)
        for key, value in activation.items():
            val_feature.append(value.reshape(-1, 512).cpu().numpy())
            val_label.extend(label.cpu().numpy())

In [111]:
val_feature = torch.cat(val_feature)
val_label = np.tensor(val_label).long()
gc.collect()

TypeError: cat(): argument 'tensors' (position 1) must be tuple of Tensors, not numpy.ndarray

In [112]:
val_feature.shape

(962281, 512)

In [113]:
val_label.shape

(962281,)

In [124]:
val_feature = torch.tensor(val_feature).float()
val_label = torch.tensor(val_label).long()

In [137]:
torch.cosine_similarity(t1, t2, dim=1)

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [277]:
from tqdm import tqdm
res = []
for source_tensor in tqdm(val_feature):
    for key, value in print_base.items():
        attention_score = torch.cosine_similarity(source_tensor, value, dim=0)
        res.append(attention_score)

100%|██████████| 962281/962281 [07:35<00:00, 2112.21it/s]


In [278]:
res = np.array(res).reshape(-1, 22)

In [279]:
res.shape

(962281, 22)

In [280]:
max_res = np.max(res, 1)

In [281]:
max_res

array([0.9855907 , 0.99236864, 0.9857584 , ..., 0.9400834 , 0.9842423 ,
       0.96428245], dtype=float32)

In [282]:
result = ((max_res > 0.95).astype(int) == val_label.numpy())

In [150]:
len(res)

22

In [283]:
sum(result) / len(result)

0.4465701806436997