In [2]:
import numpy as np
import os
import random
from torch.utils.data import Dataset, DataLoader
import torch
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim

In [3]:
resnet18()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [92]:
class Data:
    def __init__(self):
        pass
    
    def load(self, path):
        self.data = np.load(path)
        return self.data
        
    @staticmethod
    def process(data):
        data = data / np.expand_dims(np.max(np.abs(data), 1), axis=1)
        data = data.reshape(-1, 2, 64, 64)
        return data

In [93]:
data = Data()

In [94]:
x_train = data.load('../old_data/train/10type_sort_train_data_8192.npy')
x_train = data.process(x_train)
y_train = data.load('../old_data/train/10type_sort_train_label_8192.npy')
x_test = data.load('../old_data/val/10type_sort_eval_data_8192.npy')
x_test = data.process(x_test)
y_test = data.load('../old_data/val/10type_sort_eval_label_8192.npy')

In [103]:
class Train_Test_Split:
    def __init__(self, x_train, x_test, y_train, y_test):
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = y_train
        self.y_test = y_test
        # 按类别分类好手机信号
        self.x_train_data_list = []
        self.y_train_data_list = []
        self.x_test_data_list = []
        self.y_test_data_list = []
        
    def fit_transform(self, mode):
        """
        mode: 数据划分的方式
            1. train 训练模型提取embedding, 就是对普通手机操作，测试集中不能含有未知源
            2. test 验证有源无源识别, 测试集中必须含有已知源和未知源
        """
        # 将手机信号按训练集和测试集的类别分好
        for i in range(10):
            temp0 = self.x_train[self.y_train == i]
            temp1 = self.y_train[self.y_train == i]
            temp2 = self.x_test[self.y_test == i]
            temp3 = self.y_test[self.y_test == i]
            self.x_train_data_list.append(temp0)
            self.y_train_data_list.append(temp1)
            self.x_test_data_list.append(temp2)
            self.y_test_data_list.append(temp3)
        # 因为一共有10类手机，这里按照7类手机为已知源，3类手机为未知源
        # 训练集中只能含有已知源手机，测试集中已知源和未知源手机信号都要含有
        # 因为该数据训练集和测试集的时间差异比较大，所以这里按照下列方法来创建新的训练测试集
        # 1. 新的训练集只来自旧的训练集，新的测试集只来自旧的测试集
        # 2. 从训练集中随机抽取3类手机作为未知源，这三类未知源的信号，可以分到新的测试集中，从而新的训练集只含有已知源信号
        # 3. 新的测试集中含有已知源，也含有未知源
        unknown_ = [0, 1, 2] # 为了保证结果稳定，先固定手机类别
        #unknow_ = random.sample(range(10), 3) # 随机生成3个未知源手机
        known_ = [3, 4, 5, 6, 7, 8, 9]
        self.new_x_train_list = []
        self.new_y_train_list = []
        self.new_x_test_list = []
        self.new_y_test_list = []
        if mode == 'train':
            for i in range(10):
                if i in known_:
                    self.new_x_train_list.append(self.x_train_data_list[i])
                    self.new_y_train_list.append(self.y_train_data_list[i])
                    self.new_x_test_list.append(self.x_test_data_list[i])
                    self.new_y_test_list.append(self.y_test_data_list[i])
        else:
            for i in range(10):
                if i in unknown_:
                    temp0 = np.concatenate([self.x_train_data_list[i], self.x_test_data_list[i]], axis=0)
                    temp1 = np.concatenate([self.y_train_data_list[i], self.y_test_data_list[i]], axis=0)
                    self.new_x_test_list.append(temp0)
                    self.new_y_test_list.append(temp1)
                if i in known_:
                    self.new_x_train_list.append(self.x_train_data_list[i])
                    self.new_y_train_list.append(self.y_train_data_list[i])
        x_train = np.concatenate(self.new_x_train_list, axis=0)
        y_train = np.concatenate(self.new_y_train_list, axis=0)
        x_test = np.concatenate(self.new_x_test_list, axis=0)
        y_test = np.concatenate(self.new_y_test_list, axis=0)
        return x_train, x_test, y_train, y_test

In [104]:
train_test_split = Train_Test_Split(x_train, x_test, y_train, y_test)

In [105]:
x_train, x_test, y_train, y_test = train_test_split.fit_transform('train')

In [106]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((51601, 2, 64, 64), (17167, 2, 64, 64), (51601,), (17167,))

In [107]:
np.bincount(y_train)

array([    0,     0,     0, 17187,  4746,  6337, 10298,  3694,  5058,
        4281])

In [108]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = torch.FloatTensor(x)
        self.y = torch.LongTensor(y)
        
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]
    
    def __len__(self):
        return len(self.y)

class MyDataLoader(DataLoader):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        pass

In [109]:
train_dataset = MyDataset(x_train, y_train)
test_dataset = MyDataset(x_test, y_test)

In [110]:
train_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}
test_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}
train_loader = MyDataLoader(train_dataset, **train_config)
test_loader = MyDataLoader(test_dataset, **test_config)

In [111]:
model = resnet18()

In [112]:
model.conv1 = nn.Conv2d(2, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.maxpool = nn.AdaptiveAvgPool2d(output_size=(7, 7))

In [113]:
model.fc.out_features = 10

In [157]:
class Trainer:
    def __init__(self, **kwargs):
        self.device = kwargs.get("device")
        self.model = kwargs.get("model")
        self.optimizer = kwargs.get("optimizer")
        self.scheduler = kwargs.get("scheduler")
        self.criterion = kwargs.get("criterion")
        self.epochs = kwargs.get("epochs")
        self.train_loader = kwargs.get("train_loader")
        self.test_loader = kwargs.get("test_loader")
        
    def train_step(self):
        self.model.train()
        self.train_loss = 0
        self.train_acc = 0
        for feature, label in self.train_loader:
            feature = feature.to(self.device)
            label = label.to(self.device)
            
            self.optimizer.zero_grad()
            preds = self.model(feature)
            loss = self.criterion(preds, label)
            loss.backward()
            self.optimizer.step()
            self.train_acc += (preds.argmax(1) == label).sum() / len(self.train_loader.dataset)
            self.train_loss += loss.item() / len(self.train_loader)
        return self.train_loss, self.train_acc
    
    def test_step(self):
        self.model.eval()
        self.test_loss = 0
        self.test_acc = 0
        with torch.no_grad():
            for feature, label in self.test_loader:
                feature = feature.to(self.device)
                label = label.to(self.device)
                preds = self.model(feature)
                self.test_loss += self.criterion(preds, label) / len(test_loader)
                self.test_acc += (preds.argmax(1) == label).sum() / len(test_loader.dataset)
        return self.test_loss, self.test_acc
    
    def train(self):
        for epoch in range(epochs):
            train_loss, train_acc = self.train_step()
            test_loss, test_acc = self.test_step()
            self.scheduler.step(test_loss)
            print(f'Epoch:{epoch:2} | Train Loss:{train_loss:6.4f} | Train Acc:{train_acc:6.4f} | Test Loss:{test_loss:6.4f} | Test Acc:{test_acc:6.4f}')

In [176]:
epochs = 100
lr = 1e-4
criterion = nn.CrossEntropyLoss()#weight=class_weights_tensor)
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-1)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

config = {
    'device': torch.device('cpu' if torch.cuda.is_available() else 'cuda'),
    'model' : model,
    'optimizer' : optimizer,
    'scheduler' : scheduler,
    'criterion' : criterion,
    'epochs' : epochs,
    'train_loader' : train_loader,
    'test_loader' : test_loader,
}


In [177]:
trainer = Trainer(**config)

In [178]:
trainer.train()

Epoch: 0 | Train Loss:0.2675 | Train Acc:0.9015 | Test Loss:1.8537 | Test Acc:0.5435
Epoch: 1 | Train Loss:0.2368 | Train Acc:0.9128 | Test Loss:1.9807 | Test Acc:0.5467
Epoch: 2 | Train Loss:0.2207 | Train Acc:0.9191 | Test Loss:1.9200 | Test Acc:0.5519
Epoch: 3 | Train Loss:0.2084 | Train Acc:0.9236 | Test Loss:2.0771 | Test Acc:0.5534
Epoch: 4 | Train Loss:0.1976 | Train Acc:0.9264 | Test Loss:2.1773 | Test Acc:0.5468
Epoch: 5 | Train Loss:0.1874 | Train Acc:0.9304 | Test Loss:2.2356 | Test Acc:0.5429
Epoch: 6 | Train Loss:0.1773 | Train Acc:0.9337 | Test Loss:2.2820 | Test Acc:0.5429
Epoch: 7 | Train Loss:0.1598 | Train Acc:0.9398 | Test Loss:2.3406 | Test Acc:0.5468
Epoch: 8 | Train Loss:0.1526 | Train Acc:0.9438 | Test Loss:2.3817 | Test Acc:0.5477
Epoch: 9 | Train Loss:0.1523 | Train Acc:0.9437 | Test Loss:2.3873 | Test Acc:0.5493
Epoch:10 | Train Loss:0.1503 | Train Acc:0.9441 | Test Loss:2.4181 | Test Acc:0.5451
Epoch:11 | Train Loss:0.1496 | Train Acc:0.9454 | Test Loss:2.426

KeyboardInterrupt: 

In [1]:
from pathlib import Path
import numpy as np

In [2]:
root = Path('../data_8.8/')

In [3]:
pth_list = []
for sub0 in sorted(root.iterdir()):
    temp = []
    for sub1 in sub0.iterdir():
        if sub1.name not in ['vivo09', 'vivo15']:
            temp.append(sub1)
    pth_list.append(temp)

In [4]:
pth_list

[[PosixPath('../data_8.8/20210719-20210721/vivo24'),
  PosixPath('../data_8.8/20210719-20210721/vivo13'),
  PosixPath('../data_8.8/20210719-20210721/vivo17')],
 [PosixPath('../data_8.8/20210728-20210803/vivo04'),
  PosixPath('../data_8.8/20210728-20210803/vivo16'),
  PosixPath('../data_8.8/20210728-20210803/vivo25'),
  PosixPath('../data_8.8/20210728-20210803/vivo14'),
  PosixPath('../data_8.8/20210728-20210803/vivo22')]]

In [5]:
train_feature = []
train_label = []
test_feature = []
test_label = []
count = -1
for idx, sub0 in enumerate(pth_list):
    if idx == 0:
        for sub1 in sub0: # sub0是3台手机，sub1是手机采集轮数
            count += 1
            train_feature_temp = []
            train_label_temp = []
            test_feature_temp = []
            test_label_temp = []
            for sub2 in sorted(sub1.iterdir(), key=lambda path:int(path.name)):
                if int(sub2.name) <= 8: # sub2是轮数，如果采样轮数小于等于8, 则作为训练集
                    for dat_file in sub2.iterdir():
                        train_feature_temp.extend(np.fromfile(dat_file, dtype=np.int16)[:60*8192].reshape(-1, 8192))
                else:
                    for dat_file in sub2.iterdir():
                        test_feature_temp.extend(np.fromfile(dat_file, dtype=np.int16)[: 60*8192].reshape(-1, 8192))
            train_feature.extend(np.array(train_feature_temp))
            train_label.extend(np.full((len(train_feature_temp), ), count))
            test_feature.extend(np.array(test_feature_temp))
            test_label.extend(np.full((len(test_feature_temp), ), count))
        pass
    else:
        for sub1 in sub0: # sub0是5台手机，sub1是手机采集轮数
            count += 1
            train_feature_temp = []
            train_label_temp = []
            test_feature_temp = []
            test_label_temp = []
            for sub2 in sorted(sub1.iterdir(), key=lambda path:int(path.name)):
                if 3 <= int(sub2.name) <= 6: # sub2是轮数，如果采样轮数小于等于8, 则作为训练集
                    for dat_file in sub2.iterdir():
                        train_feature_temp.extend(np.fromfile(dat_file, dtype=np.int16)[:60*8192].reshape(-1, 8192))
                elif 7 <= int(sub2.name) <= 8:
                    for dat_file in sub2.iterdir():
                        test_feature_temp.extend(np.fromfile(dat_file, dtype=np.int16)[: 60*8192].reshape(-1, 8192))
            train_feature.extend(np.array(train_feature_temp))
            train_label.extend(np.full((len(train_feature_temp), ), count))
            test_feature.extend(np.array(test_feature_temp))
            test_label.extend(np.full((len(test_feature_temp), ), count))

In [10]:
res = np.fft.fft(train_feature[:10])

In [11]:
real = np.real(res)
imag = np.imag(res)

In [14]:
np.stack([real, imag]).shape

(2, 10, 8192)

In [13]:
len(train_feature), len(test_feature)

(509100, 241680)

In [14]:
np.bincount(train_label), np.bincount(test_label)

(array([ 77940, 115140,  82980,  18900,  60060,  63120,  60300,  30660]),
 array([44520, 40560, 42420, 12480, 25380, 24420, 33000, 18900]))

In [1]:
import numpy as np
import random
import torch
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim
from data import Data, MyDataLoader, MyDataset, Train_Test_Split
from trainer import Trainer

In [2]:
data = Data()

In [3]:
data.process(np.random.randn(2, 8192,)).shape

(2, 1, 64, 128)

In [4]:
x_train, x_test, y_train, y_test = data.preprocess('../data_8.8/')

In [5]:
x_train = data.process(x_train)
x_test = data.process(x_test)

In [6]:
x_train[0].shape

(1, 64, 128)

In [7]:
train_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}

test_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}
train_dataset = MyDataset(x_train, y_train)
test_dataset = MyDataset(x_test, y_test)

train_loader = MyDataLoader(train_dataset, **train_config)
test_loader = MyDataLoader(test_dataset, **test_config)

model = resnet18()
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.maxpool = nn.AdaptiveAvgPool2d(output_size=(7, 7))
model.fc.out_features = 8

epochs = 100
lr = 1e-4
criterion = nn.CrossEntropyLoss()#weight=class_weights_tensor)
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-1)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

config = {
    'device': torch.device('cpu' if torch.cuda.is_available() else 'cuda'),
    'model' : model,
    'optimizer' : optimizer,
    'scheduler' : scheduler,
    'criterion' : criterion,
    'epochs' : epochs,
    'train_loader' : train_loader,
    'test_loader' : test_loader,
}

trainer = Trainer(**config)
trainer.train()

Epoch: 0 | Train Loss:0.5527 | Train Acc:0.7794 | Test Loss:4.6997 | Test Acc:0.2898
Epoch: 1 | Train Loss:0.3099 | Train Acc:0.8784 | Test Loss:5.4845 | Test Acc:0.3704
Epoch: 2 | Train Loss:0.1984 | Train Acc:0.9253 | Test Loss:4.0530 | Test Acc:0.4574
Epoch: 3 | Train Loss:0.1465 | Train Acc:0.9463 | Test Loss:6.0048 | Test Acc:0.4031


KeyboardInterrupt: 

In [1]:
#!/home/zwl/miniconda3/envs/asr/bin/python3
import numpy as np
import random
import torch
from torchvision.models import resnet18
import torch.nn as nn
import torch.optim as optim
from data import Data, MyDataLoader, MyDataset, Train_Test_Split
from trainer import Trainer
from utils import set_seed

In [2]:
set_seed(42)

train_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}

test_config = {
    'batch_size':128,
    'shuffle':True,
    'drop_last':True,
    'pin_memory':True,
}

print('Stage1: data load')
data = Data()
#x_train = data.load('../old_data/train/10type_sort_train_data_8192.npy')
#y_train = data.load('../old_data/train/10type_sort_train_label_8192.npy')
#x_test = data.load('../old_data/val/10type_sort_eval_data_8192.npy')
#y_test = data.load('../old_data/val/10type_sort_eval_label_8192.npy')
x_train, x_test, y_train, y_test = data.preprocess('../data_8.8/')

Stage1: data load


In [4]:
x_train.shape

(509100, 8192)

In [8]:
x_train = data.process(x_train)
x_test = data.process(x_test)

ValueError: operands could not be broadcast together with shapes (254550,2,64,128) (254550,8192) 

In [7]:
x_train.shape

(254550, 2, 64, 128)

In [7]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

AttributeError: 'list' object has no attribute 'shape'

In [8]:
len(x_train), len(x_test), len(y_train), len(y_test)

(254550, 120840, 509100, 241680)

In [9]:
len(np.random.randn(2, 4))

2

In [11]:
np.random.randn(10, ).reshape(10, -1).shape

(10, 1)

In [12]:
data = np.random.randn(128, 8192)

In [13]:
length = len(data)
data = (data - np.mean(data, 1).reshape(length, -1)) / np.std(data, 1).reshape(length, 1)

In [14]:
data.shape

(128, 8192)