In [1]:
from IPython.core.interactiveshell import InteractiveShell
import ipynbname
InteractiveShell.ast_node_interactivity = "all"
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import torch.optim as optim
import torch.utils.data
from torch.nn import functional as F
import time
import torch
import random

from sklearn.preprocessing import LabelEncoder
from sktime.datasets import load_from_tsfile_to_dataframe

In [2]:
# set the hypers for the model
from argparse import Namespace
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
config = Namespace()

config.batch_size = 2
config.padding_idx = 99999
config.defaut_max_size = 500
# config.defaut_max_size_hash = 500

config.ts_max_length = None

config.with_feature_padding = True
config.with_feature_prompt = True 

config.ts_feature_size = None
config.ts_max_idx = None

config.hidden_dim = 64
config.with_prompt = True 
config.TimeLangNet_embedding_dim = 64
config.TimeLangNet_layers = 2
config.TimeLangNet_heads  = 2
config.TimeLangNet_d_k = 4
config.TimeLangNet_d_v = 4
config.num_class = None

config.epoch = 30
# config.lr = 5e-4
config.lr = 0.001
config.l2_weight = 0.01

config.ALPHA = 0.4
config.BETA = 0.6
config.GAMMA = 1


if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

cuda


[34m[1mwandb[0m: Currently logged in as: [33mleiyu0210[0m ([33mtemporal_name[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
path="Multivariate_ts/" #datasets path 
flist = pd.read_csv("MSTC_Data.csv", header=None)
flist = flist.to_numpy().tolist()

print(flist)

def readucr(filename):
    data= load_from_tsfile_to_dataframe(filename)
    return data
dataset_names = [['EthanolConcentration']]

def preprocess_data(x, y):
    """
    """
    x = x.to_numpy()
    class_le = LabelEncoder()
    y_encoded = class_le.fit_transform(y.copy())
    channels = x.shape[1]
    data_row = len(x[0][0])
    x_processed = []
    for i in range(x.shape[0]):
        x_sample = np.concatenate(x[i], axis=0).reshape(channels, data_row, 1)
        x_processed.append(x_sample)
    
    x_float = np.float32(np.array(x_processed)[:,:,:,0])
    
    return x_float, np.float32(y_encoded)

results = pd.DataFrame(index = dataset_names,
                       columns = ["accuracy_mean",
                                  "accuracy_standard_deviation",
                                  "time_training_seconds",
                                  "time_test_seconds"],
                       data = 0)
results.index.name = "dataset"

print(f"RUNNING".center(80, "="))

for dataset_name in dataset_names:
    print(f"{dataset_name}".center(80, "-"))

    # -- read data -------------------------------------------------------------

    print(f"Loading data".ljust(80 - 5, "."), end = "", flush = True)
    x_train, y_train = readucr(path + dataset_name[0] + '/' + dataset_name[0] + '_TRAIN.ts')
    X_train, Y_train = preprocess_data(x_train, y_train)

    x_test, y_test = readucr(path + dataset_name[0] + '/' + dataset_name[0] + '_TEST.ts')
    X_test, Y_test = preprocess_data(x_test, y_test)
    
    print("Done.")

[['ArticularyWordRecognition'], ['AtrialFibrillation'], ['BasicMotions'], ['Cricket'], ['DuckDuckGeese'], ['EigenWorms'], ['Epilepsy'], ['EthanolConcentration'], ['ERing'], ['FaceDetection'], ['FingerMovements'], ['HandMovementDirection'], ['Handwriting'], ['Heartbeat'], ['Libras'], ['LSST'], ['MotorImagery'], ['NATOPS'], ['PenDigits'], ['PEMS-SF'], ['PhonemeSpectra'], ['RacketSports'], ['SelfRegulationSCP1'], ['SelfRegulationSCP2'], ['StandWalkJump'], ['UWaveGestureLibrary']]
----------------------------['EthanolConcentration']----------------------------
Loading data...............................................................Done.


In [4]:
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape, max(Y_train), max(Y_test)

((261, 3, 1751), (261,), (263, 3, 1751), (263,), 3.0, 3.0)

In [5]:
_, config.ts_feature_size, config.ts_max_length = X_train.shape
config.num_class = int(max(Y_train) + 1)
config.ts_feature_size, config.ts_max_length, config.num_class

(3, 1751, 4)

In [6]:
class TimeLangNet_Encoder:
    def __init__(self, bins='auto'):
        self.bins = bins
        self.bin_edges_ = None
        self.train_type = 1
        
    def fit(self, X, feature_names):
        batch_size, feature_dim, feature_length = X.shape
        self.bin_edges_ = []

        for i in range(feature_dim):
            feature_data = X[:, i, :].flatten()

            if isinstance(self.bins, str) and self.bins == 'auto':
                bins = np.histogram_bin_edges(feature_data, bins='auto')
            elif isinstance(self.bins, int):
                bins = np.linspace(feature_data.min(), feature_data.max(), self.bins+1)
            elif isinstance(self.bins, (np.ndarray, list, tuple)):
                bins = np.array(self.bins)
            elif isinstance(self.bins, dict):
                for feature in feature_names:
                    if feature in self.bins:
                        bins = self.bins[feature]
                        if not isinstance(bins, np.ndarray):
                            bins = np.array(bins)
                        self.bin_edges_.append(bins)
                    else:
                        raise ValueError("Bin edges are not provided for feature '%s'." % feature)
            else:
                raise ValueError("Invalid bin definition.")
                
            self.bin_edges_.append(bins)
 
            
    def process_add_prompt(self, input_list, k, ts_max_idxs):
        result = []
        
        for sub_lst, max_idx in zip(input_list, ts_max_idxs):
            if len(sub_lst) < k:
                sub_lst += [max_idx - 1] * (k - len(sub_lst))
                
                if config.with_feature_prompt:
                    result.append([max_idx - 2] + sub_lst)
                else:
                    result.append(sub_lst)
            else:
                if config.with_feature_prompt:
                    result.append([max_idx - 2] + sub_lst[-k:])
                else:
                    result.append(sub_lst[-k:])
        return result

    def transform(self, X, ts_max_idxs, max_length):
        if self.bin_edges_ is None:
            raise ValueError("The fit method should be called before transform.")

        batch_size, feature_dim, length = X.shape
        full_binned_features = []

        if ts_max_idxs is None:
            ts_max_idxs = [len(bins) + 1 for bins in self.bin_edges_]
            if config.with_feature_padding:
                ts_max_idxs = [max_idx + 1 for max_idx in ts_max_idxs]
            if config.with_feature_prompt:
                ts_max_idxs = [max_idx + 1 for max_idx in ts_max_idxs]
                
            config.ts_max_idxs = ts_max_idxs        
        
        if self.train_type:
            binned_features_by_bs = []

            batch_real_lengths = []
            for j in range(batch_size):
                binned_features = []
                real_length = length
                for i in range(feature_dim):
                    bins = self.bin_edges_[i]
                    binned_feature_transformer = [np.digitize(x, bins, right=True).tolist() for x in X[j:j+1, i]]   
                    
                    real_length = max(real_length, len(binned_feature_transformer))
                    
                    binned_feature_with_prompt = self.process_add_prompt(binned_feature_transformer, max_length, ts_max_idxs)
                    binned_features.append(binned_feature_with_prompt)
                    
                binned_features_by_bs.append(binned_features)
                batch_real_lengths.append(real_length)
                
            binned_features_array = np.squeeze(np.array(binned_features_by_bs))
            
            if batch_size == 1:
                binned_features_array = np.expand_dims(binned_features_array, axis=0)
                
            return binned_features_array, batch_real_lengths
    

    def fit_transform(self, X, feature_names=None, ts_max_idxs=None, max_length=None):
        self.fit(X, feature_names)
        return self.transform(X, ts_max_idxs, max_length)
    

In [7]:
encoder_auto = TimeLangNet_Encoder(bins=42)
X_train, X_trian_length = encoder_auto.fit_transform(X_train, ts_max_idxs=config.ts_max_idx, max_length=config.ts_max_length)
print(config.ts_max_idxs)
X_test, X_test_length = encoder_auto.fit_transform(X_test, ts_max_idxs=config.ts_max_idx, max_length=config.ts_max_length)
print(config.ts_max_idxs)

[46, 46, 46]
[46, 46, 46]


In [8]:
import torch
from torch.utils.data import DataLoader, TensorDataset

def create_dataloader(X, Y, lengths, batch_size=16, shuffle=True):

    tensor_x = torch.Tensor(X)  
    tensor_y = torch.Tensor(Y).long()  
    tensor_lengths = torch.Tensor(lengths).long()  
    dataset = TensorDataset(tensor_x, tensor_y, tensor_lengths)
    
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    
    return dataloader

train_dataloader = create_dataloader(X_train, Y_train, X_trian_length, batch_size=config.batch_size)
test_dataloader = create_dataloader(X_test, Y_test, X_test_length, batch_size=config.batch_size, shuffle=False)

    
cnt = 0
for step, data in enumerate(test_dataloader):
    if step > 1: break 
    print(step)
    features , label, length= data
    print(features.shape)
    print(features)
    print(length)
    print('label:',label) 
    print("\n")     

0
torch.Size([2, 3, 1752])
tensor([[[44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.]],

        [[44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.]]])
tensor([1751, 1751])
label: tensor([0, 0])


1
torch.Size([2, 3, 1752])
tensor([[[44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.]],

        [[44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.],
         [44.,  2.,  2.,  ...,  2.,  2.,  2.]]])
tensor([1751, 1751])
label: tensor([0, 0])




In [9]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import math
from math import sqrt
import numpy as np
from random import * 
from torch.autograd import Variable
# from transformers import AutoTokenizer, AutoModel

################### Utils     #########################
class TriangularCausalMask():
    def __init__(self, B, L, device="cpu"):
        mask_shape = [B, 1, L, L]
        with torch.no_grad():
            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)

    @property
    def mask(self):
        return self._mask

    
#####################   Embedding layers ###########################

class PositionalEncoding(nn.Module):
    "Implement the PE function."
    def __init__(self, max_len, d_model, dropout = 0.05):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
  
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) *
                             -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
  
    def forward(self, x):
        # print(self.pe[:, :x.size(1)].shape)
        x = x + Variable(self.pe[:, :x.size(1)],  requires_grad=False)
        return self.dropout(x)

    
class Fusion(nn.Module):
    def __init__(self, input_size, out=1, dropout=0.2):
        super(Fusion, self).__init__()
        self.linear1 = nn.Linear(input_size, input_size)
        self.linear2 = nn.Linear(input_size, out)
        self.dropout = nn.Dropout(dropout)
        self.init_weights()

    def init_weights(self):
        init.xavier_normal_(self.linear1.weight)
        init.xavier_normal_(self.linear2.weight)

    def forward(self, input_embeddings):
        emb = torch.stack(input_embeddings)
        emb_score = F.softmax(self.linear2(torch.tanh(self.linear1(emb))), dim=0)
        emb_score = self.dropout(emb_score)
        out = torch.sum(emb_score * emb, dim=0)
        return out

class FeatureEmbedding(nn.Module):
    def __init__(self, 
                 feature_size,
                 feature_max_idxs,
                 d_model,
                 max_len=512,
                 with_pos = False,
                 with_prompt = True):
        super(FeatureEmbedding, self).__init__()
        
        self.feature_size = feature_size
        self.feature_max_idxs = feature_max_idxs
        self.d_model = d_model
        self.with_pos = with_pos 
        self.with_prompt = with_prompt
        
        self.embedding_layers = nn.ModuleList([nn.Embedding(num_embeddings=self.feature_max_idxs[i], embedding_dim=self.d_model, padding_idx = self.feature_max_idxs[i] - 1) for i in range(feature_size)])
        if self.with_pos:
            self.pos_embedding = PositionalEncoding(max_len, d_model)  # position embedding
        self.prompt_token_embedding = nn.Parameter(torch.zeros(1, 1, self.d_model))
        self.feature_weight = nn.Parameter(torch.zeros(self.feature_size))
        self.fusion_layer = Fusion(input_size = self.d_model)
        self.norm = nn.LayerNorm(d_model)

    def get_feature_embeding_with_sum(self, x):
        
        features_embeddings = []
        for i in range(self.feature_size):
            embedded_feature = self.embedding_layers[i](x[:, i, :].long()) # [bsz, max_len] -> [bsz, max_len, hidden_dim]
            features_embeddings.append(embedded_feature)
        features_embeddings = torch.stack(features_embeddings)
        features_embedding = torch.sum(features_embeddings, dim=0)  #len*emb_dim
#         features_embedding = self.fusion_layer(features_embeddings)
        
        return features_embedding
    
    def get_feature_embedding_with_add(self, x):
        bsz, fsz, seq_len = x.size()
            
        # feature embeddings
        features_embedding = torch.zeros(bsz, seq_len, self.d_model).to(device)
        
        for i in range(self.feature_size):
            
            embedded_feature = self.embedding_layers[i](x[:, i, :].long()) # [bsz, max_len] -> [bsz, max_len, hidden_dim]
            
            features_embedding += self.feature_weight[i] * embedded_feature
        
        return features_embedding
        
    def forward(self, x):
        # x : [bsz, feature_size, max_len]

        x_embedding = self.get_feature_embedding_with_add(x)
        
        if self.with_prompt:
            prompt_token = self.prompt_token_embedding.expand(x.size(0), -1, -1)
            x_embedding = torch.cat([prompt_token, x_embedding], dim=1) # [bsz, max_len, dim]
        
        if self.with_pos:
            x_embedding = self.pos_embedding(x_embedding)

        return self.norm(x_embedding)

###################### main model #####################################
    
class Encoder(nn.Module):
    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
        super(Encoder, self).__init__()
        self.attn_layers = nn.ModuleList(attn_layers)
        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
        self.norm = norm_layer

    def forward(self, x, attn_mask=None, tau=None, delta=None):
        # x [B, L, D]
        attns = []
        if self.conv_layers is not None:
            for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
                delta = delta if i == 0 else None
                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
                x = conv_layer(x)
                attns.append(attn)
            x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
            attns.append(attn)
        else:
            for attn_layer in self.attn_layers:
                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
                attns.append(attn)

        if self.norm is not None:
            x = self.norm(x)

        return x, attns

class EncoderLayer(nn.Module):
    def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
        super(EncoderLayer, self).__init__()
        d_ff = d_ff or 4 * d_model
        self.attention = attention
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        self.activation = F.relu if activation == "relu" else F.gelu

    def forward(self, x, attn_mask=None, tau=None, delta=None):
        new_x, attn = self.attention(
            x, x, x,
            attn_mask=attn_mask,
            tau=tau, delta=delta
        )
        x = x + self.dropout(new_x)

        y = x = self.norm1(x)
        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
        y = self.dropout(self.conv2(y).transpose(-1, 1))

        return self.norm2(x + y), attn
    

class AttentionLayer(nn.Module):
    def __init__(self, attention, d_model, n_heads, d_keys=None,
                 d_values=None):
        super(AttentionLayer, self).__init__()

        d_keys = d_keys or (d_model // n_heads)
        d_values = d_values or (d_model // n_heads)

        self.inner_attention = attention
        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
        self.value_projection = nn.Linear(d_model, d_values * n_heads)
        self.out_projection = nn.Linear(d_values * n_heads, d_model)
        self.n_heads = n_heads

    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
        B, L, _ = queries.shape
        _, S, _ = keys.shape
        H = self.n_heads

        queries = self.query_projection(queries).view(B, L, H, -1)
        keys = self.key_projection(keys).view(B, S, H, -1)
        values = self.value_projection(values).view(B, S, H, -1)

        out, attn = self.inner_attention(
            queries,
            keys,
            values,
            attn_mask,
            tau=tau,
            delta=delta
        )
        out = out.view(B, L, -1)

        return self.out_projection(out), attn

class FullAttention(nn.Module):
    def __init__(self, 
                 n_heads=8,
                 scale=None, 
                 attention_dropout=0.1, 
                 output_attention=False, 
                 future_mask_flag = False,
                 ):
        super(FullAttention, self).__init__()
        
        self.scale = scale
        self.future_mask_flag = future_mask_flag
        self.output_attention = output_attention
        self.dropout = nn.Dropout(attention_dropout)
        self.n_heads = n_heads

    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
        B, L, H, E = queries.shape
        _, S, _, D = values.shape


        scores = torch.einsum("blhe,bshe->bhls", queries, keys) / (sqrt(E) + 1e-6)

        if self.future_mask_flag:
            future_mask = TriangularCausalMask(B, L, device=queries.device)
            batch_size, len_mask = future_mask.size()
            future_mask = future_mask.view(batch_size, 1, len_mask, 1)
            future_mask = future_mask.expand(-1, self.n_heads, -1, -1)
            scores.masked_fill_(future_mask, -1e10)
        if attn_mask != None:
            #             attn_mask = attn_mask.unsqueeze(1).expand(-1, L, -1).bool()
            #             attn_mask = attn_mask.unsqueeze(1).expand(-1, self.n_heads, -1, -1)
            scores.masked_fill_(attn_mask, -1e10)

        A = self.dropout(torch.softmax(scores, dim=-1))
        V = torch.einsum("bhls,bshd->blhd", A, values)

        if self.output_attention:
            return V.contiguous(), A
        else:
            return V.contiguous(), None

class TimeLangNet(nn.Module):
    def __init__(self,
                 feature_size,
                 feature_max_idxs,
                 num_class,
                 max_len = 512,
                 n_layers = 6,  # number of Encoder of Encoder Layer
                 n_heads = 12,  # number of heads in Multi-Head Attention
                 d_model = 768, # Embedding Size
                 d_ff = 3072,   # 4*d_model, FeedForward dimension
                 d_k = 64,      # dimension of K(=Q), V
                 d_v = 64,       # dimension of K(=Q), V
                 output_dim = 256,
                 with_prompt = True,
                 future_mask_flag = False,
                 ):
        super(TimeLangNet, self).__init__()
            
        # hypers for the model
        self.feature_size, = feature_size,
        self.feature_max_idxs = feature_max_idxs
        self.max_len = max_len
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.d_model = d_model
        self.d_ff = d_ff
        self.d_k = d_k
        self.d_v = d_v
        self.hidden_dim = 128
        self.output_dim = output_dim 
        self.num_classes = num_class 
        
        # flags  
        self.future_mask_flag = future_mask_flag
        self.with_prompt = with_prompt
                                             
        # modules for the model
        self.embedding = FeatureEmbedding(feature_size = feature_size, 
                                          feature_max_idxs = feature_max_idxs,  
                                          d_model = d_model, 
                                          max_len = max_len, 
                                          with_prompt = with_prompt) 

        self.dropout =  nn.Dropout(0.1)
#         self.encoder = Encoder(d_model= d_model, d_keys = d_k, d_values = d_v, n_heads=n_heads, n_layers = n_layers,d_ff=d_ff,)
        self.encoder = Encoder(
            [
                EncoderLayer(
                    AttentionLayer(
                        FullAttention(n_heads = self.n_heads,
                                      output_attention = True, 
                                      future_mask_flag = False, 
                                      ), 
                        d_model = self.d_model, 
                        n_heads = self.n_heads,
                        d_keys = self.d_k,
                        d_values = self.d_v,
                    ),
                    d_model = self.d_model,
                    d_ff = self.d_ff
                ) for l in range(self.n_layers)
            ],
            norm_layer=torch.nn.LayerNorm(self.d_model)
        )
        self.out_linear = nn.Linear(self.d_model, output_dim)
        
        self.act = F.gelu
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(output_dim, num_class)
        
    def forward(self, input_features, valid_len):
        
        # hypers 
        bsz, fsz, max_len = input_features.size()
        padding_mask = self.get_padding_mask(input_features) # [bsz, max_len]
        embedded_features = self.embedding(input_features)  
        transformer_output, _ = self.encoder(embedded_features, attn_mask=padding_mask) # BS*length*hidden_dim
        transformer_output = self.act(transformer_output)
        transformer_output = self.dropout(transformer_output)
        output_represenetations = self.out_linear(transformer_output.mean(1))
        
        output = self.fc(output_represenetations)
        
#         return nn.functional.softmax(output, dim=1)
        return output
                                             
    
    def get_padding_mask(self, input_features):
            bsz, fsz, max_len = input_features.size()
#             max_len = max_len + 1 if self.with_prompt else max_len
            
            padding_mask = (input_features.sum(1)) == (sum(self.feature_max_idxs) - fsz) # [bsz, max_len]
            if self.with_prompt:
                padding_mask = torch.cat([torch.zeros(bsz,1).cuda(), padding_mask], dim=1) # [bsz, max_len +1 ]
                padding_mask = padding_mask.bool().unsqueeze(1).repeat(1, max_len + 1, 1)
            else:
                padding_mask = padding_mask.bool().unsqueeze(1).repeat(1, max_len, 1)
            padding_mask = padding_mask.unsqueeze(1).repeat(1, self.n_heads, 1, 1)

            
            return padding_mask
        

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, accuracy_score, recall_score, f1_score, precision_recall_curve, average_precision_score
import numpy as np
import random
import torch.nn.utils.rnn as rnn_utils
from torch.nn import Linear, ReLU, Sigmoid, Module, BCELoss
from torch.optim.lr_scheduler import ReduceLROnPlateau,CosineAnnealingLR,StepLR
import torch.nn.functional as F



class MyModel_FinLangNet(nn.Module):
    def __init__(self,
                 ts_feature_size,
                 ts_max_idxs,
                 num_class,
                 d_model=64,
                 n_layers=4,
                 n_heads=8,
                 d_ff=64*4,
                 d_k=8,
                 d_v=8,
                 hidden_dim=64,
                ):
        super(MyModel_FinLangNet, self).__init__()
        
        # hypers 
        self.ts_len = ts_feature_size

        self.d_model = d_model
        self.embedding_dim = d_model
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.d_ff = d_ff
        self.d_k = d_k
        self.d_v = d_v
        self.hidden_dim = 64
        self.ts_max_idxs = ts_max_idxs
        self.num_class = num_class
        # hyper functions for the model
        self.act = F.gelu
        self.sigmoid = nn.Sigmoid()  # Sigmoid 激活函数
        # 时序特征处理 
        # category_feature
        # hyper parameters for the model        
        self.TimeLangNet = TimeLangNet(self.ts_len,  
                 feature_max_idxs = ts_max_idxs,
                 num_class = self.num_class ,
                 n_layers =  self.n_layers  ,  # number of Encoder of Encoder Layer
                 n_heads  =  self.n_heads   ,  # number of heads in Multi-Head Attention
                 d_model  =  self.d_model   , # Embedding Size
                 d_ff     =  self.d_ff      ,   # 4*d_model, FeedForward dimension
                 d_k      =  self.d_k       ,      # dimension of K(=Q), V
                 d_v      =  self.d_v       ,      # dimension of K(=Q), V
                 output_dim = 256, 
        )
    
    def forward(self, 
                ts_feature, 
                len_ts,
                ):
        
        # 时序特征处理 
        ts_feature_input =  ts_feature #BS*dim*length
        ts_out = self.TimeLangNet(ts_feature_input, len_ts)
        
        return ts_out

In [12]:
# 创建模型实例
model = MyModel_FinLangNet(
    ts_feature_size = config.ts_feature_size,
    ts_max_idxs = config.ts_max_idxs,
    num_class = config.num_class,
    d_model = config.TimeLangNet_embedding_dim,
    n_layers = config.TimeLangNet_layers,
    n_heads = config.TimeLangNet_heads,
    d_ff = config.TimeLangNet_embedding_dim * 4,
    d_k = config.TimeLangNet_d_k,
    d_v = config.TimeLangNet_d_v,
    hidden_dim= 64, 
).to(device)

#  --optimizer RAdam
# optimizer =torch.optim.AdamW(model.parameters(), lr=0.001,)
optimizer = torch.optim.AdamW(model.parameters(), 
                              lr=config.lr, 
                              betas=(0.9, 0.999),
                              eps=1e-08, 
                              weight_decay=config.l2_weight)

# scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

# optimizer = torch.optim.RAdam(model.parameters(), lr=config.lr)

scheduler = StepLR(optimizer, step_size = 3, gamma=0.2)


print(model)

def get_parameter_number(model):
    total_num = sum(p.numel() for p in model.parameters())
    trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return {'Total': total_num, 'Trainable': trainable_num}
print(get_parameter_number(model))


MyModel_FinLangNet(
  (sigmoid): Sigmoid()
  (TimeLangNet): TimeLangNet(
    (embedding): FeatureEmbedding(
      (embedding_layers): ModuleList(
        (0): Embedding(46, 64, padding_idx=45)
        (1): Embedding(46, 64, padding_idx=45)
        (2): Embedding(46, 64, padding_idx=45)
      )
      (fusion_layer): Fusion(
        (linear1): Linear(in_features=64, out_features=64, bias=True)
        (linear2): Linear(in_features=64, out_features=1, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (dropout): Dropout(p=0.1, inplace=False)
    (encoder): Encoder(
      (attn_layers): ModuleList(
        (0): EncoderLayer(
          (attention): AttentionLayer(
            (inner_attention): FullAttention(
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (query_projection): Linear(in_features=64, out_features=8, bias=True)
            (key_projection): Linear(in_f

In [None]:
early_stop_patience = 10 
best_val_loss = float('inf')
best_epoch = 0
import math
import os
import numpy
import time, json, datetime 
from sklearn.metrics import roc_curve, auc, accuracy_score, recall_score, f1_score, precision_recall_curve, average_precision_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, classification_report

criterion = nn.CrossEntropyLoss()

regression_criterion = torch.nn.MSELoss()

def write_log(w):
#     file_name = 'logs/' + datetime.date.today().strftime('%m%d')+"_{}.log".format("deepfm")
    t0 = datetime.datetime.now().strftime('%H:%M:%S')
    info = "{} : {}".format(t0, w)
    print(info)
    
best_ks = 0.0
best_epoch = 0
num_epochs = config.epoch

def cal_accuracy(y_pred, y_true):
    return np.mean(y_pred == y_true)

for epoch in range(num_epochs):
    train_tmp = model.train()
    train_loss = 0.0
    train_cnt = 0
    print("Current lr : {}".format(optimizer.state_dict()['param_groups'][0]['lr']))
    write_log('Epoch: {}'.format(epoch + 1))
        
    for step,(features , labels, lengths) in enumerate(train_dataloader):
        ts_feature = features
        label = labels
        len_ts= lengths
        train_loss = []
        ts_feature = ts_feature.to(device)
       
        label = label.long().to(device)
        
        optimizer.zero_grad()
        outputs = model(ts_feature, 
                len_ts)
#         print(outputs)
        loss = criterion(outputs, label)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
                
        if step % 100 == 0:
            msg = f"""Epoch [{epoch+1}/{num_epochs}],
            Step [{step}],
            Loss: {loss.item():.4f} """
            write_log(msg)
        train_cnt += 1
#         break
        
    scheduler.step()
#     train_loss /= train_cnt
#     print('val is begin')
    val_tmp = model.eval()
    val_loss = []
    all_preds = []
    all_labels = []

    val_cnt = 0
    with torch.no_grad():
        for val_step,(features,labels, lengths) in enumerate(test_dataloader):
            ts_feature= features
            label = labels
            len_ts = lengths
            
            label = label.long().to(device)
            ts_feature = ts_feature.to(device)
         
            outputs = model(ts_feature,
                len_ts)
                        
            loss = criterion(outputs, label)
            val_loss.append(loss)
            
            
            all_preds.append(outputs.detach())
            all_labels.append(label)
                
            if val_step % 100 == 0:
                msg = f'Valid runing,Epoch [{epoch+1}/{num_epochs}],Loss: {loss.item():.4f}'
            
            val_cnt += 1
#             break

    all_preds = torch.cat(all_preds, dim=0)
    all_labels = torch.cat(all_labels, dim=0)
    probs = torch.nn.functional.softmax(all_preds)  # (total_samples, num_classes) est. prob. for each class and sample
    all_preds = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
    all_labels = all_labels.flatten().cpu().numpy()
#     accuracy = cal_accuracy(predictions, trues)
    
    accuracy = accuracy_score(all_labels, all_preds)
    conf_matrix = confusion_matrix(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    class_report = classification_report(all_labels, all_preds)

    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1 Score: {f1}')
    print('Classification Report\n', class_report)



Current lr : 1.2800000000000007e-08
00:40:03 : Epoch: 1
00:40:03 : Epoch [1/50],
            Step [0],
            Loss: 1.2186 
00:40:09 : Epoch [1/50],
            Step [100],
            Loss: 1.2570 
00:40:12 : Valid runing,Epoch [1/50],Loss: 0.8314
00:40:12 : Valid runing,Epoch [1/50],Loss: 1.5547
Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
Classification Report
               precision    recall  f1-score   support

           0       0.34      0.64      0.45        66
           1       0.57      0.18      0.28        66
           2       0.27      0.45      0.34        66
           3       0.10      0.02      0.03        65

    accuracy                           0.32       263
   macro avg       0.32      0.32      0.27       263
weighted avg       0.32      0.32      0.27       263

Current lr : 2.5600000000000015e-09
00:40:13 : Epoch: 2
00:40:13 : Epoch [2/50],
            Step [0],
            Loss: 0

Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
Classification Report
               precision    recall  f1-score   support

           0       0.34      0.64      0.45        66
           1       0.57      0.18      0.28        66
           2       0.27      0.45      0.34        66
           3       0.10      0.02      0.03        65

    accuracy                           0.32       263
   macro avg       0.32      0.32      0.27       263
weighted avg       0.32      0.32      0.27       263

Current lr : 2.0480000000000016e-11
00:41:40 : Epoch: 11
00:41:40 : Epoch [11/50],
            Step [0],
            Loss: 1.0376 
00:41:46 : Epoch [11/50],
            Step [100],
            Loss: 0.8978 
00:41:49 : Valid runing,Epoch [11/50],Loss: 0.8314
00:41:49 : Valid runing,Epoch [11/50],Loss: 1.5547
Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
C

00:43:12 : Epoch [20/50],
            Step [100],
            Loss: 1.1550 
00:43:14 : Valid runing,Epoch [20/50],Loss: 0.8314
00:43:16 : Valid runing,Epoch [20/50],Loss: 1.5547
Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
Classification Report
               precision    recall  f1-score   support

           0       0.34      0.64      0.45        66
           1       0.57      0.18      0.28        66
           2       0.27      0.45      0.34        66
           3       0.10      0.02      0.03        65

    accuracy                           0.32       263
   macro avg       0.32      0.32      0.27       263
weighted avg       0.32      0.32      0.27       263

Current lr : 1.6384000000000016e-13
00:43:16 : Epoch: 21
00:43:16 : Epoch [21/50],
            Step [0],
            Loss: 1.4295 
00:43:22 : Epoch [21/50],
            Step [100],
            Loss: 0.9931 
00:43:23 : Valid runing,Epoch [21/50],Los

00:44:43 : Epoch [30/50],
            Step [0],
            Loss: 0.9548 
00:44:48 : Epoch [30/50],
            Step [100],
            Loss: 1.1728 
00:44:51 : Valid runing,Epoch [30/50],Loss: 0.8314
00:44:52 : Valid runing,Epoch [30/50],Loss: 1.5547
Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
Classification Report
               precision    recall  f1-score   support

           0       0.34      0.64      0.45        66
           1       0.57      0.18      0.28        66
           2       0.27      0.45      0.34        66
           3       0.10      0.02      0.03        65

    accuracy                           0.32       263
   macro avg       0.32      0.32      0.27       263
weighted avg       0.32      0.32      0.27       263

Current lr : 1.3107200000000015e-15
00:44:52 : Epoch: 31
00:44:53 : Epoch [31/50],
            Step [0],
            Loss: 1.0286 
00:44:59 : Epoch [31/50],
            Step 

Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
Classification Report
               precision    recall  f1-score   support

           0       0.34      0.64      0.45        66
           1       0.57      0.18      0.28        66
           2       0.27      0.45      0.34        66
           3       0.10      0.02      0.03        65

    accuracy                           0.32       263
   macro avg       0.32      0.32      0.27       263
weighted avg       0.32      0.32      0.27       263

Current lr : 1.0485760000000013e-17
00:46:21 : Epoch: 40
00:46:21 : Epoch [40/50],
            Step [0],
            Loss: 1.1492 
00:46:28 : Epoch [40/50],
            Step [100],
            Loss: 1.4639 
00:46:29 : Valid runing,Epoch [40/50],Loss: 0.8314
00:46:30 : Valid runing,Epoch [40/50],Loss: 1.5547
Accuracy: 0.3231939163498099
Precision: 0.32210453480945284
Recall: 0.322027972027972
F1 Score: 0.27256158429489313
C