In [1]:
import torch
import torch.nn as nn
from src.model.modernTCN import ModernTCNEmbed
from src import Layer
batch_size = 2 
seq_len = 30
patch_len = 3
stride = 2
nvars = 6
mask_ratio = 0.4
d_model = 16
predict_steps = 1

num_patch = (max(seq_len, patch_len)-patch_len) // stride + 2 # 15

x = torch.rand(batch_size , seq_len , nvars)
y = torch.rand(batch_size , predict_steps)
x.shape , y.shape

ImportError: cannot import name 'basic' from partially initialized module 'src.model' (most likely due to a circular import) (d:\Coding\learndl\learndl\src\model\__init__.py)

In [None]:
embed = ModernTCNEmbed(nvars , d_model , patch_len=patch_len , stride=stride, shared=True)
embed(x).shape

torch.Size([2, 6, 15, 16])

In [None]:
class MtcnTSMixer(nn.Module):
    '''
    in : [bs x nvars x d_model x num_patch]
    out: [bs x nvars * d_model x num_patch]
    '''
    def __init__(self, nvars, d_model, kernel_size = 3):
        super().__init__()
        self.nvars   = nvars
        self.d_model = d_model
        self.dw_conv = nn.Conv1d(
            in_channels     = nvars * d_model, 
            out_channels    = nvars * d_model, 
            kernel_size     = kernel_size,
            groups          = nvars * d_model,
            padding         = 'same'
        )
        self.bn = nn.BatchNorm1d(nvars * d_model)
        
    def forward(self , x):
        '''
        in : [bs x nvars x d_model x num_patch]
        out: [bs x nvars * d_model x num_patch]
        '''
        bs = x.shape[0]
        x = x.reshape(bs,self.nvars*self.d_model,-1)    # [bs x nvars * d_model x num_patch]
        x = self.dw_conv(x)                             # [bs x nvars * d_model x num_patch]
        x = self.bn(x)                                  # [bs x nvars * d_model x num_patch]
        #x = x.reshape(bs,self.nvars,self.d_model,-1)    # [bs x nvars x d_model x num_patch]
        return x

class MtcnFeatureMixer(nn.Module):
    '''
    in : [bs x nvars * d_model x num_patch]
    out: [bs x nvars x d_model x num_patch]
    '''
    def __init__(self, nvars, d_model, kernel_size = 1 , activation = 'gelu'):
        super().__init__()
        self.nvars   = nvars
        self.d_model = d_model
        self.act     = Layer.Act.get_activation_fn(activation)
        self.pw_con_up = nn.Conv1d(
            in_channels     = nvars * d_model, 
            out_channels    = kernel_size * nvars * d_model, 
            kernel_size     = 1,
            groups          = nvars
        )
        self.pw_con_down = nn.Conv1d(
            in_channels     = kernel_size * nvars * d_model, 
            out_channels    = nvars * d_model, 
            kernel_size     = 1 ,
            groups          = nvars
        )
        
    def forward(self,x):
        '''
        in : [bs x nvars * d_model x num_patch]
        out: [bs x nvars x d_model x num_patch]
        '''
        bs = x.shape[0]
        x = self.pw_con_up(x)                           # [bs x kernel_size * nvars * d_model x num_patch]
        x = self.act(x)                                 # [bs x kernel_size * nvars * d_model x num_patch]
        x = self.pw_con_down(x)                         # [bs x nvars * d_model x num_patch]
        x = x.reshape(bs,self.nvars,self.d_model,-1)    # [bs x nvars x d_model x num_patch]
        return x # out shape :[bs, nvars, d_model, num_patch] 
    
    
class MtcnChannelMixer(nn.Module):
    '''
    in : [bs x nvars x d_model x num_patch]
    out: [bs x nvars x d_model x num_patch]
    '''
    def __init__(self, nvars, d_model, kernel_size = 1 , activation = 'gelu'):
        super().__init__()
        self.nvars   = nvars
        self.d_model = d_model
        self.act     = Layer.Act.get_activation_fn(activation)
        self.pw_con_up = nn.Conv1d(
            in_channels     = nvars * d_model, 
            out_channels    = kernel_size * nvars * d_model, 
            kernel_size     = 1,
            groups          = d_model
        )
        self.pw_con_down = nn.Conv1d(
            in_channels     = kernel_size * nvars * d_model, 
            out_channels    = nvars * d_model, 
            kernel_size     = 1,
            groups          = d_model
        )
        
    def forward(self,x):
        '''
        in : [bs x nvars x d_model x num_patch]
        out: [bs x nvars x d_model x num_patch]
        ''' 
        bs = x.shape[0]
        x = x.permute(0,2,1,3)                                      # [bs x d_model x nvars x num_patch]
        x = x.reshape(bs,self.nvars*self.d_model,-1)                # [bs x d_model * nvars x num_patch]
        x = self.pw_con_up(x)                                       # [bs x kernel_size * d_model * nvars x num_patch]
        x = self.act(x)                                             # [bs x kernel_size * d_model * nvars x num_patch]
        x = self.pw_con_down(x)                                     # [bs x d_model * nvars x num_patch]
        x = x.reshape(bs,self.nvars,self.d_model,-1)                # [bs x d_model x nvars x num_patch]
        x = x.permute(0,2,1,3)                                      # [bs x nvars x d_model x num_patch]
        return x  

class ModernTCNBlock(nn.Module):
    '''
    in : [bs x nvars x num_patch x d_model]
    out: [bs x nvars x num_patch x d_model]
    '''
    def __init__(self, nvars, d_model, kernel_size, expansion_factor = 1,channel_mixer = True):
        super().__init__()
        self.ts_mixer = MtcnTSMixer(nvars, d_model, kernel_size)
        self.feature_mixer = MtcnFeatureMixer(nvars, d_model, expansion_factor)
        self.channel_mixer = MtcnChannelMixer(nvars, d_model, expansion_factor) if channel_mixer else nn.Sequential()
    
    def forward(self, x):
        '''
        in : [bs x nvars x num_patch x d_model]
        out: [bs x nvars x num_patch x d_model]
        '''
        res = x
        x = x.permute(0,1,3,2)      # [bs x nvars x d_model x num_patch]
        x = self.ts_mixer(x)        # [bs x nvars x d_model x num_patch]
        x = self.feature_mixer(x)   # [bs x nvars x d_model x num_patch]
        x = self.channel_mixer(x)   # [bs x nvars x d_model x num_patch]
        x = x.permute(0,1,3,2)      # [bs x nvars x num_patch x d_model]

        return res + x

In [None]:
# 1,train,valid loader shuffle
# 2,tar extract and record all dates
# 3,seperate model config yaml, and use train yaml to load them
# 4,

In [None]:
class ModernTCN(nn.Module):
    def __init__(self,
                 nvars :int ,
                 seq_len :int,
                 d_model = 16,
                 patch_len = 8,
                 stride = 4 ,
                 kernel_size = 3 ,
                 expansion_factor = 1,
                 num_layers = 1,
                 label_idx = 1,
                 factormean = True,
                 num_MLP = 1,
                 gamma = 0,
                 predict_steps:int = 1,
                 channel_mixer = True,
                 shared_embedding=True, shared_head = False, 
                 ):
        super().__init__()
        
        '''
        self, 
        nvars : int , 
        seq_len: int , 
        patch_len:int, 
        stride:int, 
        d_model:int=32, 
        shared_embedding=True, shared_head = False, 
        revin:bool=True,n_layers:int=3, n_heads=16, d_ff:int=256, 
        norm:str='BatchNorm', attn_dropout:float=0., dropout:float=0., act:str='gelu', 
        res_attention:bool=True, pre_norm:bool=False, store_attn:bool=False,
        pe:str='zeros', learn_pe:bool=True, head_dropout = 0, predict_steps:int = 1,
        head_type = 'prediction', verbose:bool=False, **kwargs
        '''


        if stride is None: stride = patch_len // 2
        num_patch = seq_len // patch_len
        self.embed = ModernTCNEmbed(nvars , d_model , patch_len=patch_len , stride=stride, shared=shared_embedding)
        
        
        layers = [ModernTCNBlock(nvars=nvars,
                                 d_model=d_model,
                                 kernel_size=kernel_size,
                                 num_patch =num_patch,
                                 expansion_factor=expansion_factor,
                                 channel_mixer=channel_mixer
                                 ) for _ in range(num_layers)
                  ]
        self.tcn_encoder = nn.Sequential(*layers)

        # predict head
        self.ts_linear = nn.Linear(d_model*num_patch,d_model)   
        
        # task specific setting
        self.label_idx = label_idx
        
        # predict layer
        self.predict_layer = nn.Sequential(
            nn.Linear(nvars*d_model, 50)
        )
    
    
    def forward(self,inputs):
        x_emb = self.embed(inputs)  # [bs, nvars, seq_len] -> [bs, nvars, d_model, num_patch]
        x_out = self.tcn_encoder(x_emb)  # [bs, nvars, d_model, num_patch]
        x_out = rearrange(x_out, 'bs nvars d n -> bs nvars (d n)')
        
        x_out = F.gelu(self.ts_linear(x_out))  # [bs, nvars, d_model]
        x_out = rearrange(x_out, 'bs nvars d -> bs (nvars d)') # [bs, nvars*d_model]
        return x_out
    