In [None]:
import wshrRelabelLight as WRL
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# 指定文件夹路径
# download_save_path = 'E:/Dataset/wind_shear/Data_Download'
exception_save_path = '../Dataset/Exception_Data'

# 获取文件夹下的所有文件名称
# download_folder_names = [item for item in os.listdir(download_save_path) if os.path.isdir(os.path.join(download_save_path, item))]
exception_folder_names = [item for item in os.listdir(exception_save_path) if os.path.isdir(os.path.join(exception_save_path, item))]
instruction_folder_names = ["@Instructions"]

# 生成所有文件夹路径
# download_folder_paths = [os.path.join(download_save_path, item) for item in download_folder_names]
exception_folder_paths = [os.path.join(exception_save_path, item) for item in exception_folder_names]

# 指定读取变量名称
'''
CTSO仿真器输出变量：TIME, ALT, HDOT, VT, ALPHA, GAMMA, PITCH, GREF, WXDT, WZ, VDOT, ALRT
没有WXDT和VDOT的原始数据，GREF不知道什么意思
'''
variable_list = ['ALT', 'ALTR', "TAS", 'GS', 'AOA1', 'AOA2', 'PTCH', 'WS', "WD", 'SAT', 'TAT', 'PI', 'PT']

# 构建训练集和测试集
train_folder_path = exception_folder_paths[3]
train_mat_name = os.listdir(train_folder_path)[2]
train_X, train_Y = WRL.dataConstruct(train_folder_path, train_mat_name, variable_list, normalized=False)

test_folder_path = exception_folder_paths[1]
test_mat_name = os.listdir(test_folder_path)[0]
test_X, test_Y = WRL.dataConstruct(test_folder_path, test_mat_name, variable_list, normalized=False)

train_wshr_warn_idx_list = np.where(train_Y[:, 0] == 0)[0]
test_wshr_warn_idx_list = np.where(test_Y[:, 0] == 0)[0]

# 计算飞行轨迹角
variable_list.append('GAMMA')
train_Gamma_X = train_X[:, 6] - np.mean(train_X[:, 4:5])
test_Gamma_X = test_X[:, 6] - np.mean(test_X[:, 4:5])

# 重组训练集和测试集数据
train_X = np.hstack((train_X, train_Gamma_X.reshape(-1, 1)))
test_X = np.hstack((test_X, test_Gamma_X.reshape(-1, 1)))

# # 截取海拔位于50~2000英尺的起飞阶段数据
# train_X = train_X[np.where((train_X[:int(train_X.shape[0]/2), 0] >= 50) & (train_X[:int(train_X.shape[0]/2), 0] <= 2000))[0]]
# test_X = test_X[np.where((test_X[:int(test_X.shape[0]/2), 0] >= 50) & (test_X[:int(test_X.shape[0]/2), 0] <= 2000))[0]]
# train_X = train_X[:5000, :]
print(train_X.shape, test_X.shape)

# 数据集归一化
s_scaler = StandardScaler()
train_X = s_scaler.fit_transform(train_X)
train_mean, train_std = s_scaler.mean_, s_scaler.scale_
test_X_origin = test_X
test_X = s_scaler.transform(test_X)

In [None]:
# logSparse transformer 实现
'''
revised based on https://zhuanlan.zhihu.com/p/391337035
'''
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

device = 'cpu'
if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    device = 'cuda'

# Self Attention Class
class SelfAttentionConv(nn.Module):
    def __init__(self, k, headers=8, kernel_size=5, mask_next=True, mask_diag=False):
        super().__init__()

        self.k, self.headers, self.kernel_size = k, headers, kernel_size
        self.mask_next = mask_next
        self.mask_diag = mask_diag

        h = headers # 注意力头数

        # Query, Key and Value Transformations
        padding = (kernel_size - 1)
        self.padding_opertor = nn.ConstantPad1d((padding, 0), 0)

        self.toqueries = nn.Conv1d(k, k * h, kernel_size, padding=0, bias=True)
        self.tokeys = nn.Conv1d(k, k * h, kernel_size, padding=0, bias=True)
        self.tovalues = nn.Conv1d(k, k * h, kernel_size=1, padding=0, bias=False)  # No convolution operated
        # kernel_size=1就是原始transformer，>1就是卷积transformer，卷积核能够收集到更多上下文趋势信息

        # Heads unifier
        self.unifyheads = nn.Linear(k * h, k)

    def forward(self, x):
        # Extraction dimensions
        b, t, k = x.size()  # batch_size, number_of_timesteps, number_of_time_series

        # Checking Embedding dimension
        assert self.k == k, 'Number of time series ' + str(k) + ' didn t much the number of k ' + str(
            self.k) + ' in the initiaalization of the attention layer.'
        h = self.headers

        #  Transpose to see the different time series as different channels
        x = x.transpose(1, 2)
        x_padded = self.padding_opertor(x)

        # Query, Key and Value Transformations
        queries = self.toqueries(x_padded).view(b, k, h, t)
        keys = self.tokeys(x_padded).view(b, k, h, t)
        values = self.tovalues(x).view(b, k, h, t)

        # Transposition to return the canonical format
        queries = queries.transpose(1, 2)  # batch, header, time serie, time step (b, h, k, t)
        queries = queries.transpose(2, 3)  # batch, header, time step, time serie (b, h, t, k)

        values = values.transpose(1, 2)  # batch, header, time serie, time step (b, h, k, t)
        values = values.transpose(2, 3)  # batch, header, time step, time serie (b, h, t, k)

        keys = keys.transpose(1, 2)  # batch, header, time serie, time step (b, h, k, t)
        keys = keys.transpose(2, 3)  # batch, header, time step, time serie (b, h, t, k)

        # Weights
        queries = queries / (k ** (.25))
        keys = keys / (k ** (.25))

        queries = queries.transpose(1, 2).contiguous().view(b * h, t, k)
        keys = keys.transpose(1, 2).contiguous().view(b * h, t, k)
        values = values.transpose(1, 2).contiguous().view(b * h, t, k)

        weights = torch.bmm(queries, keys.transpose(1, 2))

        ## Mask the upper & diag of the attention matrix
        if self.mask_next:
            if self.mask_diag:
                indices = torch.triu_indices(t, t, offset=0)
                weights[:, indices[0], indices[1]] = float('-inf')
            else:
                indices = torch.triu_indices(t, t, offset=1)
                weights[:, indices[0], indices[1]] = float('-inf')

        # Softmax
        weights = F.softmax(weights, dim=2)

        # Output
        output = torch.bmm(weights, values)
        output = output.view(b, h, t, k)
        output = output.transpose(1, 2).contiguous().view(b, t, k * h)

        return self.unifyheads(output)  # shape (b,t,k)


# Conv Transforme Block
class ConvTransformerBLock(nn.Module):
    def __init__(self, k, headers, kernel_size=5, mask_next=True, mask_diag=False, dropout_proba=0.2):
        super().__init__()

        # Self attention
        self.attention = SelfAttentionConv(k, headers, kernel_size, mask_next, mask_diag)

        # First & Second Norm
        self.norm1 = nn.LayerNorm(k)
        self.norm2 = nn.LayerNorm(k)

        # Feed Forward Network
        self.feedforward = nn.Sequential(
            nn.Linear(k, 4 * k),
            nn.ReLU(),
            nn.Linear(4 * k, k)
        )

        # Dropout funtcion  & Relu:
        self.dropout = nn.Dropout(p=dropout_proba)
        self.activation = nn.ReLU()

    def forward(self, x, train=False):
        # Self attention + Residual
        x = self.attention(x) + x

        # Dropout attention
        if train:
            x = self.dropout(x)

        # First Normalization
        x = self.norm1(x)

        # Feed Froward network + residual
        x = self.feedforward(x) + x

        # Second Normalization
        x = self.norm2(x)

        return x

# Forcasting Conv Transformer :
class ForcastConvTransformer(nn.Module):
    def __init__(self, k, headers, depth, seq_length, kernel_size=5, mask_next=True, mask_diag=False, dropout_proba=0.2,
                 num_tokens=None):
        super().__init__()
        # Embedding
        self.tokens_in_count = False
        if num_tokens:
            self.tokens_in_count = True
            self.token_embedding = nn.Embedding(num_tokens, k)  # （369, 1）= (nb_ts, k)

        # Embedding the position
        self.position_embedding = nn.Embedding(seq_length, k)   # (500, 1) = (windows_size, k)

        # Number of kind of time series
        self.k = k  # 没有协变量的情况下，k=1
        self.seq_length = seq_length    # seq_length即窗口大小, 数据准备的时候切割好了

        # Transformer blocks
        tblocks = []
        # log sparse 稀疏策略： 采用多层ConvTrans层堆叠的方式
        for t in range(depth):
            tblocks.append(ConvTransformerBLock(k, headers, kernel_size, mask_next, mask_diag, dropout_proba))
        self.TransformerBlocks = nn.Sequential(*tblocks)

        # Transformation from k dimension to numClasses
        self.topreSigma = nn.Linear(k, 1)
        self.tomu = nn.Linear(k, 1)
        self.plus = nn.Softplus()

    def forward(self, x, tokens=None):
        b, t, k = x.size()

        # checking that the given batch had same number of time series as the BLock had
        assert k == self.k, 'The k :' + str(
            self.k) + ' number of timeseries given in the initialization is different than what given in the x :' + str(
            k)
        assert t == self.seq_length, 'The lenght of the timeseries given t ' + str(
            t) + ' miss much with the lenght sequence given in the Tranformers initialisation self.seq_length: ' + str(
            self.seq_length)

        # Position embedding
        pos = torch.arange(t)
        self.pos_emb = self.position_embedding(pos).expand(b, t, k)

        # Checking token embedding
        assert self.tokens_in_count == (not (tokens is None)), 'self.tokens_in_count = ' + str(
            self.tokens_in_count) + ' should be equal to (not (tokens is None)) = ' + str((not (tokens is None)))
        if not (tokens is None):
            ## checking that the number of tockens corresponde to the number of batch elements
            assert tokens.size(0) == b
            self.tok_emb = self.token_embedding(tokens)
            self.tok_emb = self.tok_emb.expand(t, b, k).transpose(0, 1)

        # Adding Pos Embedding and token Embedding to the variable
        if not (tokens is None):
            x = self.pos_emb + self.tok_emb + x
        else:
            x = self.pos_emb + x

        # Transformer :
        x = self.TransformerBlocks(x)
        mu = self.tomu(x)
        presigma = self.topreSigma(x)
        sigma = self.plus(presigma)

        return mu, sigma

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 构建Transformer模型
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, seq_len, num_layers, d_model, num_heads, d_ff, dropout):
        super(TransformerModel, self).__init__()
        
        self.embedding = nn.Linear(input_dim, d_model)
        self.positional_encoding = PositionalEncoding(d_model, dropout, seq_len)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model, num_heads, d_ff, dropout),
            num_layers
        )
        self.decoder = nn.Linear(d_model, output_dim)
    
    def forward(self, x):
        x = self.embedding(x)  # 输入数据经过线性变换
        x = self.positional_encoding(x)  # 加入位置编码
        x = self.transformer_encoder(x)  # Transformer编码器
        x = self.decoder(x)  # 解码成目标维度
        return x

# 位置编码
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

# 构建训练和测试数据
input_dim = train_X.shape[1]  # 输入维度
output_dim = train_X.shape[1]  # 输出维度
seq_len = 10  # 时间序列长度
num_layers = 4  # Transformer层数
d_model = 64  # 模型维度
num_heads = 4  # 注意力头数
d_ff = 128  # 前馈神经网络的隐藏层维度
dropout = 0.1  # Dropout概率

# 构建随机时间序列数据
np.random.seed(0)
# x_train = np.random.rand(100, seq_len, input_dim).astype(np.float32)
# y_train = np.random.rand(100, output_dim).astype(np.float32)

# 转换为Tensor
x_train_tensor = torch.from_numpy(train_X)
y_train_tensor = torch.from_numpy(y_train)

# 初始化Transformer模型
model = TransformerModel(input_dim, output_dim, seq_len, num_layers, d_model, num_heads, d_ff, dropout)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 10
for epoch in range(epochs):
    optimizer.zero_grad()
    output = model(x_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))

# 使用模型进行预测
test_input = torch.from_numpy(np.random.rand(1, seq_len, input_dim).astype(np.float32))
predicted_output = model(test_input)
print('Predicted Output:', predicted_output)
