In [1]:
import numpy as np
import torch
import os
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

In [2]:
def load_npy_data_with_labels(folder_path, label):
    data_list = []  # 用于存储所有读取的数据
    labels_list = []  # 用于存储所有的标签
    for file in os.listdir(folder_path):
        if file.endswith('.npy'):
            file_path = os.path.join(folder_path, file)
            data = np.load(file_path)
            data_list.append(data)
            labels_list.append(label)
    return data_list, labels_list

# 使用你的文件夹路径替换这里
base_folder_path = 'train_data'  # e.g., 'path_to_your_data/train_data'

# 加载 language_0 的数据并分配标签 0
language_0_folder_path = os.path.join(base_folder_path, 'language_0')
language_0_data, language_0_labels = load_npy_data_with_labels(language_0_folder_path, 0)

# 加载 language_1 的数据并分配标签 1
language_1_folder_path = os.path.join(base_folder_path, 'language_1')
language_1_data, language_1_labels = load_npy_data_with_labels(language_1_folder_path, 1)

# 合并两种语言的数据和标签
train_data_raw = language_0_data + language_1_data
train_labels = language_0_labels + language_1_labels

# 打印出一些信息来确认数据已被加载
print(f"Loaded {len(language_0_data)} samples for language 0 and {len(language_1_data)} samples for language 1.")
train_data_raw[0].shape

Loaded 2000 samples for language 0 and 2000 samples for language 1.


(294, 80)

In [3]:
lengths = [seq.shape[0] for seq in train_data_raw]

# 将序列、长度和标签组合成一个列表
combined = list(zip(train_data_raw, lengths, train_labels))

# 按照序列长度进行排序
combined.sort(key=lambda x: x[1], reverse=True)

# 分离排序后的序列和标签
train_data_sorted, _, train_labels_sorted = zip(*combined)

# 如果需要，将它们转换回列表
train_data_sorted = list(train_data_sorted)
train_labels_sorted = list(train_labels_sorted)
train_data_sorted[0].shape,train_labels_sorted[0]

((734, 80), 1)

In [4]:
train_data_tensor = [torch.tensor(data) for data in train_data_sorted]
train_labels_tensor = torch.tensor(train_labels_sorted)
def create_attention_mask(sequence_padded, lengths):
    # 创建一个与填充序列相同形状的掩码，所有元素先设为 False
    mask = torch.zeros(sequence_padded.shape[:2], dtype=torch.bool)
    for idx, length in enumerate(lengths):
        mask[idx, :length] = True  # 将每个序列的非填充部分设为 True
    return mask
# 计算每个序列的原始长度
lengths = [len(seq) for seq in train_data_tensor]
#  使用 pad_sequence 填充序列
padded_sequences = pad_sequence(train_data_tensor, batch_first=True)
# 创建 TensorDataset 和 DataLoader
attention_masks = create_attention_mask(padded_sequences, lengths)
train_dataset = TensorDataset(padded_sequences, train_labels_tensor,attention_masks)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

# 随机分割成训练集和验证集
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader=DataLoader(val_dataset,batch_size=32,shuffle=True)
train_dataset[1][2].shape

torch.Size([734])

In [9]:
class CustomTransformerEncoderLayer(nn.Module):
    def __init__(self, hidden_size, nhead, dropout):
        super(CustomTransformerEncoderLayer, self).__init__()
        self.self_attn = nn.MultiheadAttention(hidden_size, nhead, dropout=dropout)
        self.linear1 = nn.Linear(hidden_size, 2048)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(2048,hidden_size)
        self.norm1 = nn.LayerNorm(2048)
        self.norm2 = nn.LayerNorm(hidden_size)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

    def forward(self, src, src_mask,is_causal=False):
        # 自注意力层
        src2 = self.self_attn(src, src, src, attn_mask=src_mask)[0]
        src = src + self.dropout1(src2)
        src = self.norm1(src)
        # 前馈网络
        src2 = self.linear2(self.dropout(F.relu(self.linear1(src))))
        src = src + self.dropout2(src2)
        src = self.norm2(src)
        return src

In [12]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(1), :]
        return x

class TransformerClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, nhead, num_layers, dropout=0.5):
        super(TransformerClassifier, self).__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(input_size)
        self.input_fc = nn.Linear(input_size, hidden_size)
        self.encoder_layer = CustomTransformerEncoderLayer(hidden_size, nhead=nhead,dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.init_weights()
        self.sigmoid = nn.Sigmoid()

    def init_weights(self):
        initrange = 0.1
        self.fc.bias.data.zero_()
        self.fc.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, src_mask,is_causal=False):
        src = self.pos_encoder(src)
        output = self.transformer_encoder(self.input_fc(src), src_mask)
        output = self.fc(output.mean(dim=1))
        return output
    

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [14]:
model = TransformerClassifier(input_size=80, hidden_size=128, num_classes=1, nhead=4, num_layers=2).to(device)
print(model)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练循环
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for sequences, labels,masks in train_loader:
        sequences, labels = sequences.to(device), labels.to(device)
        labels=labels.float()
        
        # 前向传播
        output = model(sequences, masks)  # 假设没有使用 src_mask
        loss = criterion(output.squeeze(), labels).sum()
        # 反向传播和优化
        model.train()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print()
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for sequences, labels, masks in val_loader:
            sequences, labels, masks = sequences.to(device), labels.to(device),masks.to(device)
            labels = labels.float()
            outputs = model(sequences, masks)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Accuracy of the model on the validation data: %d %%' % accuracy)


TransformerClassifier(
  (pos_encoder): PositionalEncoding()
  (input_fc): Linear(in_features=80, out_features=128, bias=True)
  (encoder_layer): CustomTransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
    )
    (linear1): Linear(in_features=128, out_features=2048, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (linear2): Linear(in_features=2048, out_features=128, bias=True)
    (norm1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.5, inplace=False)
    (dropout2): Dropout(p=0.5, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x CustomTransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )




TypeError: forward() got an unexpected keyword argument 'src_key_padding_mask'