In [14]:
import lightning as L 
from torch import nn,optim
import torch.nn.functional as F
import torch
import torchmetrics
import random
import glob
import math
from einops import rearrange
import numpy as np

In [15]:

class MTCNPatchEmbed(nn.Module):
    '''
    输入: [B, L, M]
    1D卷积同时完成patch和embedding
    输出: [B, M, D, N] (CI)  |  [B 1 D N] (CM)
    '''
    def __init__(self,
                P=8,
                S=4,
                D=32,
                M = 6,
                ):
        super().__init__()
        self.P = P
        self.S = S
        self.conv = nn.Conv1d(
            in_channels=1, 
            out_channels=D, 
            kernel_size=P, 
            stride=S
            )
        
    def forward(self, inputs):
        x = rearrange(inputs, 'B L M -> B M L')  # [B L M] -> [B M L]
        bs = x.shape[0]
        x = x.unsqueeze(2)  # [B, M, L] -> [B, M, 1, L]
        x = rearrange(x, 'B M R L -> (B M) R L')  # [B, M, 1, L] -> [B*M, 1, L]
        # padding
        x_pad = F.pad(
            x,
            pad=(0, self.P - self.S),
            mode='replicate'
            )  # [B*M, 1, L] -> [B*M, 1, L+P-S]
        x_emb = self.conv(x_pad)  # [B*M, 1, L+P-S] -> [B*M, D, N]
        x_emb = rearrange(x_emb, '(B M) D N -> B M D N', B=bs)  # [B*M, D, N] -> [B, M, D, N]
        return x_emb  # x_emb: [B, M, D, N]
    
    

In [16]:

# ====================Norm====================
class MixerNormLayer(nn.Module):
    '''
    Batch / Layer Norm
    输入维度: (B, M, N, D)
    输出维度: (B, M, N, D)
    如果是batchnorm,会先合并B和M的维度,然后把D转到倒数第二个维度，再进行batchnorm
    '''
    def __init__(self, norm_type,d_model):
        super().__init__()

        self.norm_type = norm_type
        
        if "batch" in norm_type.lower():
            self.norm = nn.BatchNorm1d(d_model) # 默认格式为(N,C) 或 (N,C,L)
        else:
            self.norm = nn.LayerNorm(d_model)  # 默认对最后一个维度进行LayerNorm
            
    def forward(self, inputs):
        if "batch" in self.norm_type.lower():
            # 将数据转为N C L 的格式
            B = inputs.shape[0]
            inputs = rearrange(inputs, "B M N D -> (B M) N D")
            inputs = inputs.transpose(1, 2)  # [BM, D, N])
            inputs = self.norm(inputs)
            inputs = inputs.transpose(1, 2)  # [BM, N, D])
            output = rearrange(inputs, "(B M) N D -> B M N D", B=B)
        else:
            output = self.norm(inputs)
        return output

In [17]:
# IC LOSS
class ICLoss(nn.Module):
    def __init__(self, gamma=0):
        super(ICLoss, self).__init__()
        self.gamma = gamma

    def forward(self, y_pred, y_true):
        y_pred_ = torch.mean(y_pred, dim=1).unsqueeze(1)
        y_pred_demean = y_pred_ - y_pred_.mean(dim=0, keepdim=True)
        y_true_demean = y_true - y_true.mean(dim=0, keepdim=True)
        cos_sim = F.cosine_similarity(y_pred_demean, y_true_demean, dim=0)
        loss1 = 1 - cos_sim.mean()
        
        if self.gamma > 0:
            F_inv = torch.linalg.inv(torch.matmul(y_pred_demean.T, y_pred_demean))
            penalty = torch.trace(F_inv)
            loss1 = loss1 + self.gamma * penalty
        return loss1

In [18]:
class MtcnTSMixer(nn.Module):
    '''
    DW卷积--时间依赖
    inputs: [B, M, D, N]
    output: [B, M*D, N]
    '''
    def __init__(self, M, D, kernel_size):
        super().__init__()
        self.dw_conv = nn.Conv1d(
            in_channels=M*D, 
            out_channels=M*D, 
            kernel_size=kernel_size,
            groups=M*D,
            padding='same'
        )
        self.bn = nn.BatchNorm1d(M*D)
        
    def forward(self,inputs):
        # inputs: [B, M, D, N]
        x_out = rearrange(inputs, 'b m d n -> b (m d) n')             # [B, M, D, N] -> [B, M*D, N]
        x_out = self.dw_conv(x_out)                                   # [B, M*D, N]  -> [B, M*D, N]
        x_out = self.bn(x_out)                                        # [B, M*D, N]  -> [B, M*D, N]
        return x_out

class MtcnFeatureMixer(nn.Module):
    '''
    特征依赖
    PW卷积1--同一个变量的特征交互
    inputs: [B, M*D, N]
    output: [B, M, D, N]
    '''
    def __init__(self, M, D, r):
        super().__init__()
        self.D = D
        self.pw_con1 = nn.Conv1d(
            in_channels=M*D, 
            out_channels=r*M*D, 
            kernel_size=1,
            groups=M
        )
        self.pw_con2 = nn.Conv1d(
            in_channels=r*M*D, 
            out_channels=M*D, 
            kernel_size=1,
            groups=M
        )
        
    def forward(self,x):
        # inputs: [B, M, D, N]
        x = F.gelu(self.pw_con1(x))
        x = self.pw_con2(x)
        # reshape
        x = rearrange(x, 'b (m d) n -> b m d n', d = self.D)          # [B, M*D, N] -> [B, M, D,N]
        return x # out shape :[B, M, D, N] 
    
    
class MtcnChannelMixer(nn.Module):
    '''
    通道依赖
    同一个特征的不同变量交互
    inputs:  [B, M, D, N]
    output:  [B, M, D, N]
    '''
    def __init__(self, M, D, r):
        super().__init__()
        self.D = D
        self.pw_con3 = nn.Conv1d(
            in_channels=M*D, 
            out_channels=r*M*D, 
            kernel_size=1,
            groups=D
        )
        self.pw_con4 = nn.Conv1d(
            in_channels=r*M*D, 
            out_channels=M*D, 
            kernel_size=1,
            groups=D
        )
        
    def forward(self,x):
        # inputs: [B, M, D, N]        
        x = x.permute(0,2,1,3)                                      # [B, D, M, N]
        x = rearrange(x, 'b d m n -> b (d m) n')                    # [B, D, M, N] -> [B, D*M, N]
        
        x = F.gelu(self.pw_con3(x))
        x = self.pw_con4(x)
        # reshape
        x = rearrange(x, 'b (d m) n -> b d m n', d=self.D)  
        x = x.permute(0,2,1,3)  
        return x  # out shape :[B, M, D, N] 


class ModernTCNBlock(nn.Module):
    def __init__(self, M, D, kernel_size, expansion_factor = 1,N = None,channel_mixer = True):
        super().__init__()
        self.channel_mixer = channel_mixer
        self.ts_mixer = MtcnTSMixer(M, D, kernel_size)
        self.feature_mixer = MtcnFeatureMixer(M, D, r = expansion_factor)
        if self.channel_mixer:
            self.channel_mixer = MtcnChannelMixer(M, D, r = expansion_factor)
    
    def forward(self, x_emb):
        # x_emb: [B, M, D, N]
        res = x_emb
        x_emb = self.ts_mixer(x_emb)
        x_emb = self.feature_mixer(x_emb)

        if self.channel_mixer:
            x_emb = self.channel_mixer(x_emb)

        out = res + x_emb  # Residual connection
        return out # out: [B, M, D, N]
    
class TcnFactorNetV3(L.LightningModule):
    '''
    B : batch size
    M : 多变量序列的变量数
    L : 过去序列的长度
    T : 预测序列的长度
    N : 分Patch后Patch的个数
    D : 每个变量的通道数
    P : kernel size of embedding layer
    S : stride of embedding layer
    '''
    def __init__(self,
                 M = 6,
                 L = 30,
                 T = 1,
                 D = 16,
                 P = 8,
                 kernel_size = 10,
                 expansion_factor = 1,
                 num_layers = 1,
                 label_idx = 1,
                 factormean = True,
                 S = None,
                 num_MLP = 1,
                 gamma = 0,
                 channel_mixer = True,
                 ):
        super().__init__()
        
        
        if S is None:
            S = int(P // 2)
            print(f"设定S为P的一半: .{S}")
            
        N = L // S
        self.embed_layer = MTCNPatchEmbed(P, S, D,M=M)

        
        layers = [ModernTCNBlock(M=M,
                                 D=D,
                                 kernel_size=kernel_size,
                                 N =N,
                                 expansion_factor=expansion_factor,
                                 channel_mixer=channel_mixer
                                 ) for _ in range(num_layers)
                  ]
        self.tcn_encoder = nn.Sequential(*layers)

        # predict head
        self.ts_linear = nn.Linear(D*N,D)   
        
        # task specific setting
        self.label_idx = label_idx
        
        # predict layer
        self.predict_layer = nn.Sequential(
            nn.Linear(M*D, 50)
        )
        self.loss_fn = ICLoss(gamma)
    
    
    def forward(self,inputs):
        x_emb = self.embed_layer(inputs)  # [B, M, L] -> [B, M, D, N]
        x_out = self.tcn_encoder(x_emb)  # [B, M, D, N]
        x_out = rearrange(x_out, 'b m d n -> b m (d n)')
        
        x_out = F.gelu(self.ts_linear(x_out))  # [B, M, D]
        x_out = rearrange(x_out, 'b m d -> b (m d)') # [B, M*D]
        return x_out
    
    def training_step(self, batch, batch_idx):
        inputs = batch[0].squeeze(0)
        
        y_pred = self.forward(inputs)
        y_pred = self.predict_layer(y_pred)
        
        labels  = batch[1].squeeze(0)[:,self.label_idx].reshape(-1,1)
        loss = self.loss_fn(y_pred,labels)
        self.log("train_loss",loss, on_epoch=True,prog_bar=True)
        return loss
    
    def predict_step(self, batch, batch_idx):
        inputs = batch[0].squeeze(0)
        y_pred = self.predict_layer(y_pred)
        ids = batch[1].squeeze(0)[:,0]
        y_pred = self.forward(inputs)
        return torch.cat((y_pred,ids.unsqueeze(1)),1).cpu().numpy()
    
    def validation_step(self,batch,batch_idx):
        inputs = batch[0].squeeze(0)
        y_pred = self.forward(inputs)
        y_pred = self.predict_layer(y_pred)
        labels  = batch[1].squeeze(0)[:,self.label_idx].reshape(-1,1)
        loss = self.loss_fn(y_pred,labels)
        self.log("val_loss",loss,prog_bar=True)
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer