In [6]:
from math import sqrt
import torch

from torch.autograd import Variable
'''
这段代码导入了 torch.autograd 模块中的 Variable 类，用于创建可以自动求导的张量。在 PyTorch 中，Variable 已经被整合到张量（Tensor）类中，所以在较新版本的 PyTorch 中，通常不再需要单独导入 Variable 类。您可以直接使用张量进行自动求导操作。
'''

from torch import nn
from torch.nn import functional as F
import numpy

处理位置信息
- 先直接用卷积

In [7]:
import torch
import torch.nn as nn

class LocationLayer(nn.Module):
    def __init__(self, attention_n_filters, attention_kernel_size, attention_dim):
        super(LocationLayer, self).__init__()
        padding = (attention_kernel_size - 1) // 2  # 保持卷积前后形状一致
        self.location_conv = nn.Conv1d(2, attention_n_filters,
                                       kernel_size=attention_kernel_size,
                                       padding=padding, bias=False, stride=1,
                                       dilation=1)

    def forward(self, attention_weights_cat):
        processed_attention = self.location_conv(attention_weights_cat)
        processed_attention = torch.transpose(processed_attention, 1, 2)
        return processed_attention

attention
- 先用linear

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class AttentionLayer(nn.Module):
    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
                 attention_location_n_filters, attention_location_kernel_size):
        super(AttentionLayer, self).__init__()

        # 注意力权重计算
        self.query = nn.Linear(attention_rnn_dim, attention_dim, bias=False)
        self.key = nn.Linear(embedding_dim, attention_dim)
        self.value = nn.Linear(attention_dim, 1, bias=False)
        self.record = nn.Linear(embedding_dim, attention_dim, bias=False)

        # 位置信息处理
        self.location_conv = nn.Conv1d(2, attention_location_n_filters,
                                       kernel_size=attention_location_kernel_size,
                                       padding=(attention_location_kernel_size - 1) // 2,
                                       bias=False)

    def attention_energy(self, query, processed_memory, attention_weights_cat):
        processed_query = self.query(query.unsqueeze(1))
        processed_attention_weights = self.location_conv(attention_weights_cat)
        energies = self.value(torch.tanh(
            processed_query + processed_attention_weights + processed_memory))
        energies = energies.squeeze(-1)
        return energies

    def forward(self, attention_hidden_state, memory, processed_memory,
                attention_weights_cat):
        energy = self.attention_energy(
            attention_hidden_state, processed_memory, attention_weights_cat)

        attention_weights = F.softmax(energy, dim=1)
        attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
        attention_context = attention_context.squeeze(1)
        return attention_context, attention_weights

encoder
- conv
- bilstm



In [None]:
class Encoder(nn.Module):
    def __init__(self, encoder_n_convolutions, encoder_embedding_dim, encoder_kernel_size):
        super(Encoder, self).__init__()

        # Convolutional layers 需要传入卷积层数
        self.convolutions = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(encoder_embedding_dim, encoder_embedding_dim,
                          kernel_size=encoder_kernel_size, stride=1,
                          padding=int((encoder_kernel_size - 1) / 2)),
                nn.BatchNorm1d(encoder_embedding_dim),
                nn.ReLU(),
                nn.Dropout(0.5)
            )
            for _ in range(encoder_n_convolutions)
        ])

        # BiLSTM layer
        self.lstm = nn.LSTM(encoder_embedding_dim, int(encoder_embedding_dim / 2), 1,
                            batch_first=True, bidirectional=True)

    def forward(self, x, input_lengths):
        # Convolutional layers
        for conv in self.convolutions:
            x = conv(x.transpose(1, 2)).transpose(1, 2)

        # BiLSTM layer
        self.lstm.flatten_parameters()
        outputs, _ = self.lstm(x)

        return outputs

    def inference(self, x):
        # Convolutional layers
        for conv in self.convolutions:
            x = conv(x.transpose(1, 2)).transpose(1, 2)

        # BiLSTM layer
        self.lstm.flatten_parameters()
        outputs, _ = self.lstm(x)

        return outputs

实体化encoder

In [None]:
# 创建编码器实例

#! 我不知道是多少 参数还没有弄好
encoder_n_convolutions, encoder_embedding_dim, encoder_kernel_size = 3,100,100

encoder = Encoder(encoder_n_convolutions, encoder_embedding_dim, encoder_kernel_size)

# 将输入数据传递给编码器获取编码器的输出
encoder_outputs = encoder(input_data, input_lengths)

decoder
- 参数还没有搞好！

In [None]:
class Decoder(nn.Module):
    def __init__(self, encoder_embedding_dim, decoder_embedding_dim, hidden_dim, output_dim):
        super(Decoder, self).__init__()

        self.hidden_dim = hidden_dim

        # LSTM layer
        self.lstm = nn.LSTM(decoder_embedding_dim, hidden_dim, batch_first=True)

        # Linear layer to project the LSTM output to the output dimension
        self.fc = nn.Linear(hidden_dim, output_dim)

        # Attention layer
        self.attention = nn.Linear(encoder_embedding_dim + hidden_dim, 1)

    def forward(self, encoder_outputs, decoder_inputs, hidden):
        # LSTM layer
        decoder_outputs, hidden = self.lstm(decoder_inputs, hidden)

        # Attention mechanism
        attention_scores = self.attention(torch.cat((encoder_outputs, decoder_outputs), dim=2))
        attention_weights = F.softmax(attention_scores, dim=1)
        context_vector = torch.bmm(attention_weights.transpose(1, 2), encoder_outputs)

        # Linear layer
        output = self.fc(torch.cat((decoder_outputs, context_vector), dim=2))

        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize hidden state and cell state with zeros
        return (torch.zeros(1, batch_size, self.hidden_dim),
                torch.zeros(1, batch_size, self.hidden_dim))

还在更新中