In [1]:
import os
# 设置环境变量，只让程序看到 GPU 2
os.environ['CUDA_VISIBLE_DEVICES'] = '0'


import torch
import torch.nn as nn
import wandb
import random
import argparse
import numpy as np
from tqdm import tqdm
from transformers import BertModel, AutoModel
from transformers import AdamW

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


# 数据预处理函数

In [2]:
from torch.utils.data import Dataset
import json

class BAE2025Dataset(Dataset):
    def __init__(
            self,
            data_path,
            label_types=["Mistake_Location", "Providing_Guidance", "Actionability"],
            labels={
                "Yes": 0,
                "To some extent": 1, 
                "No": 2,
            }
    ):
        self.data_path = data_path
        self.label_types = label_types
        self.labels = labels
        self._get_data()
    
    def _get_data(self):
        with open(self.data_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        self.data = []
        for item in data:
            sent1 = item['conversation_history']
            sent2 = item['response']
            
            label_values = []
            # 逐个标签提取并转换
            for label_type in self.label_types:
                label = item.get(label_type)
                if label not in self.labels:
                    break  # 如果有任何一个标签缺失或无效，就跳过这个样本
                label_values.append(self.labels[label])
            else:
                # 只有在所有标签都成功提取时才添加到数据集中
                self.data.append(((sent1, sent2), label_values))
    
    def __len__(self):
        return len(self.data)
    
    def get_labels(self):
        return self.labels

    def __getitem__(self, idx):
        return self.data[idx]

# 数据加载函数

In [3]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from transformers import AutoConfig
from transformers import DebertaV2Tokenizer

class BAE2025DataLoader:
    def __init__(
        self,
        dataset,
        batch_size=16,
        max_length=512,
        shuffle=True,
        drop_last=True,
        device=None,
        # tokenizer_name='chinese-bert-wwm-ext'
        # tokenizer_name='chinese-roberta-wwm-ext'
        # tokenizer_name='chinese-roberta-wwm-ext-large'
        # tokenizer_name='/mnt/cfs/huangzhiwei/pykt-moekt/SBM/bge-large-en-v1.5'
        tokenizer_name='/mnt/cfs/huangzhiwei/BAE2025/models/deberta-v3-base'
        # tokenizer_name='/mnt/cfs/huangzhiwei/BAE2025/models/roberta-base'
    ):
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        self.tokenizer.truncation_side = 'left'  # 设置截断方向为左侧,即从句子开头开始截断,假设一个句子过长，则从句子开头开始截断，保留句子结尾的部分
        print("当前使用的 tokenizer 类型：", type(self.tokenizer))
        
        # config = AutoConfig.from_pretrained(tokenizer_name)
        # self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, config=config, use_fast=True)
        
        
        # self.tokenizer = DebertaV2Tokenizer.from_pretrained(tokenizer_name)
        
        self.dataset = dataset
        self.batch_size = batch_size
        self.max_length = max_length
        self.shuffle = shuffle
        self.drop_last = drop_last

        if device is None:
            self.device = torch.device(
                'cuda' if torch.cuda.is_available() else 'cpu'
            )
        else:
            self.device = device

        self.loader = DataLoader(
            dataset=self.dataset,
            batch_size=self.batch_size,
            collate_fn=self.collate_fn,
            shuffle=self.shuffle,
            drop_last=self.drop_last
        )

    def collate_fn(self, data):
        sents = [i[0] for i in data]
        labels = [i[1] for i in data]

        # 修改这里，处理两个句子的情况
        data = self.tokenizer.batch_encode_plus(
            batch_text_or_text_pairs=[(sent[0], sent[1]) for sent in sents],
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt',
            return_length=True
        )
        input_ids = data['input_ids'].to(self.device)
        attention_mask = data['attention_mask'].to(self.device)
        # token_type_ids = data['token_type_ids'].to(self.device)
        # labels = torch.LongTensor(labels).to(self.device)
        
        # 将 label 列表变成 tensor，自动处理为二维
        labels = torch.tensor(labels, dtype=torch.long).to(self.device)

        # return input_ids, attention_mask, token_type_ids, labels
        return input_ids, attention_mask, labels


    def __iter__(self):
        for data in self.loader:
            yield data

    def __len__(self):
        return len(self.loader)



# 模型代码

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel


class ExpertLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
        )
        
    def forward(self, x):
        # x: [batch_size, seq_len, input_size]
        output, (hidden, _) = self.lstm(x)
        # 返回最后一个时间步的隐藏状态
        return hidden[-1]  # [batch_size, hidden_size]


class ExpertBiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.1):
        super().__init__()
        self.bilstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size // 2,  # 因为是双向的，所以每个方向的隐藏层大小减半
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0,
        )
        
    def forward(self, x):
        # x: [batch_size, seq_len, input_size]
        output, (hidden, _) = self.bilstm(x)
        # 拼接最后一层的正向和反向隐藏状态
        # hidden shape: [num_layers * num_directions, batch_size, hidden_size//2]
        hidden_forward = hidden[-2]  # 正向的最后一层 [batch_size, hidden_size//2]
        hidden_backward = hidden[-1]  # 反向的最后一层 [batch_size, hidden_size//2]
        hidden_concat = torch.cat([hidden_forward, hidden_backward], dim=1)  # [batch_size, hidden_size]
        return hidden_concat


class ExpertRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.1):
        super().__init__()
        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
        )
        
    def forward(self, x):
        # x: [batch_size, seq_len, input_size]
        _, hidden = self.rnn(x)
        # 返回最后一个时间步的隐藏状态
        return hidden[-1]  # [batch_size, hidden_size]


class ExpertGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.1):
        super().__init__()
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
        )
        
    def forward(self, x):
        # x: [batch_size, seq_len, input_size]
        _, hidden = self.gru(x)
        # 返回最后一个时间步的隐藏状态
        return hidden[-1]  # [batch_size, hidden_size]


class ExpertLinear(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.linear = nn.Sequential(
            nn.Linear(input_size, hidden_size * 2),
            nn.LayerNorm(hidden_size * 2),
            nn.GELU(),
            nn.Linear(hidden_size * 2, hidden_size)
        )
        
    def forward(self, x):
        # x: [batch_size, seq_len, input_size]
        # 我们需要把序列信息压缩为一个向量，可以使用平均池化
        pooled = torch.mean(x, dim=1)  # [batch_size, input_size]
        return self.linear(pooled)  # [batch_size, hidden_size]


class BertClassificationHead(nn.Module):
    def __init__(self, hidden_size=1024, num_classes=3, dropout_prob=0.3):
        super().__init__()
        self.dense = nn.Linear(hidden_size, hidden_size)
        self.dropout = nn.Dropout(dropout_prob)
        self.out_proj = nn.Linear(hidden_size, num_classes)
    
    def forward(self, features):
        # 提取 [CLS] 标记的表示
        x = features[:, 0, :]  # 使用第一个标记([CLS])
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x


class MoERouter(nn.Module):
    """专家路由器，学习为每个样本分配专家权重"""
    def __init__(self, input_size, num_experts):
        super().__init__()
        self.router = nn.Linear(input_size, num_experts)
        
    def forward(self, x):
        # x: [batch_size, input_size]
        # 计算每个专家的权重 (使用softmax确保权重和为1)
        router_logits = self.router(x)
        router_probs = F.softmax(router_logits, dim=-1)
        return router_probs  # [batch_size, num_experts]


class DeBERTaMoEClassifier(nn.Module):
    def __init__(
        self, 
        pretrained_model_name, 
        num_classes=3, 
        freeze_pooler=0,
        expert_hidden_size=256,
        dropout=0.3,
        num_rnn_layers=1,
        num_tasks=4  # 新增参数，指定任务数量
    ):
        super().__init__()
        
        self.num_tasks = num_tasks  # 存储任务数量
        
        # 使用 AutoModel 加载 DeBERTa 模型
        self.bert = AutoModel.from_pretrained(pretrained_model_name)
        
        # 获取 bert 隐藏层大小
        self.bert_hidden_size = self.bert.config.hidden_size
        
        # 为每个任务创建独立的分类头
        self.task_classifiers = nn.ModuleList([
            BertClassificationHead(
                hidden_size=self.bert_hidden_size,
                num_classes=num_classes,
                dropout_prob=dropout
            ) for _ in range(num_tasks)
        ])
        
        # 创建多个专家模型
        self.experts = nn.ModuleDict({
            'lstm': ExpertLSTM(
                input_size=self.bert_hidden_size, 
                hidden_size=expert_hidden_size,
                num_layers=num_rnn_layers,
                dropout=dropout
            ),
            'bilstm': ExpertBiLSTM(
                input_size=self.bert_hidden_size, 
                hidden_size=expert_hidden_size,
                num_layers=num_rnn_layers,
                dropout=dropout
            ),
            'rnn': ExpertRNN(
                input_size=self.bert_hidden_size, 
                hidden_size=expert_hidden_size,
                num_layers=num_rnn_layers,
                dropout=dropout
            ),
            'gru': ExpertGRU(
                input_size=self.bert_hidden_size, 
                hidden_size=expert_hidden_size,
                num_layers=num_rnn_layers,
                dropout=dropout
            ),
            'linear': ExpertLinear(
                input_size=self.bert_hidden_size, 
                hidden_size=expert_hidden_size
            ),
        })
        
        # 创建路由器 (使用[CLS]标记表示作为路由的输入)
        self.router = MoERouter(self.bert_hidden_size, len(self.experts))
        
        # 为每个任务创建专家输出层
        self.expert_outputs = nn.ModuleList([
            nn.ModuleDict({
                expert_name: nn.Linear(expert_hidden_size, num_classes)
                for expert_name in self.experts.keys()
            }) for _ in range(num_tasks)
        ])
        
        # 为每个任务创建融合层
        combined_dim = num_classes * (1 + len(self.experts))
        self.final_classifiers = nn.ModuleList([
            nn.Sequential(
                nn.Linear(combined_dim, combined_dim // 2),
                nn.LayerNorm(combined_dim // 2),
                nn.Dropout(dropout),
                nn.ReLU(),
                nn.Linear(combined_dim // 2, num_classes)
            ) for _ in range(num_tasks)
        ])
        
        # 创建可学习的损失权重参数
        # 初始化Track 4 (Actionability)的权重略大，表示更重视这个任务
        self.loss_weights = nn.Parameter(torch.ones(num_tasks))
        # 设置初始权重，使Track 4的权重初始值更大
        with torch.no_grad():
            # 假设Track 4是索引3
            self.loss_weights[2] = 1.5  # 给Track 4一个更高的初始权重
        
    def forward(self, input_ids, attention_mask):
        # DeBERTa 编码
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        
        # 获取序列隐藏状态
        hidden_states = outputs.last_hidden_state  # [batch_size, seq_len, hidden_size]
        
        # 获取路由权重
        cls_embedding = hidden_states[:, 0]  # [batch_size, hidden_size]
        routing_weights = self.router(cls_embedding)  # [batch_size, num_experts]
        
        # 处理每个任务
        final_logits_list = []
        
        for task_idx in range(self.num_tasks):
            # 获取当前任务的原始分类头结果
            original_logits = self.task_classifiers[task_idx](hidden_states)  # [batch_size, num_classes]
            
            # 获取各专家结果
            expert_logits_list = [original_logits]  # 包含原始分类头
            
            for expert_name, expert in self.experts.items():
                # 获取专家输出
                expert_hidden = expert(hidden_states)  # [batch_size, expert_hidden_size]
                # 映射到类别空间
                expert_logits = self.expert_outputs[task_idx][expert_name](expert_hidden)  # [batch_size, num_classes]
                # 添加到列表
                expert_logits_list.append(expert_logits)
            
            # 拼接所有结果 [batch_size, (1+num_experts)*num_classes]
            combined_logits = torch.cat(expert_logits_list, dim=1)
            
            # 通过最终分类器输出当前任务的最终结果
            task_final_logits = self.final_classifiers[task_idx](combined_logits)
            
            final_logits_list.append(task_final_logits)
        
        # 返回所有任务的预测结果以及损失权重
        return {
            'logits': final_logits_list,  # 每个任务的预测结果列表
            'loss_weights': F.softmax(self.loss_weights, dim=0)  # 归一化的损失权重
        }

    def get_normalized_loss_weights(self):
        """获取归一化后的损失权重"""
        return F.softmax(self.loss_weights, dim=0)

# 训练参数设置

In [5]:
import os
import wandb
import random
import argparse
from tqdm import tqdm

import torch
import torch.nn as nn
import numpy as np
from transformers import AdamW
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

# 如果在Jupyter Notebook中运行，可以使用这个自定义参数函数替代argparser
def get_default_configs():
    """在Jupyter环境中使用的默认配置，避免argparse解析错误"""
    class Args:
        def __init__(self):
            # self.model_name = '/mnt/cfs/huangzhiwei/pykt-moekt/SBM/bge-large-en-v1.5'
            # self.model_name = "/mnt/cfs/huangzhiwei/BAE2025/models/ModernBERT-large"
            # self.model_name = '/mnt/cfs/huangzhiwei/pykt-moekt/SBM/xlm-roberta-large'
            # self.model_name = '/mnt/cfs/huangzhiwei/BAE2025/models/bge-base-en-v1.5'
            # self.model_name = '/mnt/cfs/huangzhiwei/BAE2025/models/bert-base-uncased'
            self.model_name = '/mnt/cfs/huangzhiwei/BAE2025/models/deberta-v3-base'
            # self.model_name = '/mnt/cfs/huangzhiwei/BAE2025/models/roberta-base'
            self.num_classes = 3
            self.dropout = 0.25
            self.freeze_pooler = 8
            self.batch_size = 16
            self.max_length = 512
            self.lr = 2e-5
            self.epochs = 50
            self.device = device
            self.name = None
            self.seed = 42
            self.data_path = '../data_new/train.json'
            self.val_data_path = '../data_new/val.json'
            self.checkpoint_dir = 'checkpoints_track4'
            self.patience = 8
            self.expert_hidden_size = 512
            self.num_rnn_layers = 1
            self.warmup_ratio = 0.1
            self.num_tasks = 3   # 新增参数，指定任务数量
            self.exp_name = 'BAE2025_track4_bert'
    return Args()


# 训练函数

In [6]:
def train(configs):
    
    # 设置随机种子
    random.seed(configs.seed)
    np.random.seed(configs.seed)
    torch.manual_seed(configs.seed)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # 创建检查点目录
    checkpoint_dir = os.path.join(configs.checkpoint_dir, configs.exp_name)
    os.makedirs(checkpoint_dir, exist_ok=True)
    
    # 为保存混淆矩阵创建目录 - 为每个任务分别创建
    task_names = ["track2", "track3", "track4"]
    plot_dirs = {}
    
    for task_name in task_names:
        train_plot_dir = os.path.join(checkpoint_dir, 'plots', task_name, 'train')
        val_plot_dir = os.path.join(checkpoint_dir, 'plots', task_name, 'val')
        os.makedirs(train_plot_dir, exist_ok=True)
        os.makedirs(val_plot_dir, exist_ok=True)
        plot_dirs[task_name] = {
            'train': train_plot_dir,
            'val': val_plot_dir
        }
    
    # 加载数据集
    train_dataset = BAE2025Dataset(configs.data_path)
    val_dataset = BAE2025Dataset(configs.val_data_path)    

    # 创建数据加载器
    train_dataloader = BAE2025DataLoader(
        dataset=train_dataset,
        batch_size=configs.batch_size,
        max_length=configs.max_length,
        shuffle=True,
        drop_last=True,
        device=configs.device,
        tokenizer_name=configs.model_name
    )

    val_dataloader = BAE2025DataLoader(
        dataset=val_dataset,
        batch_size=configs.batch_size,
        max_length=configs.max_length,
        shuffle=False,
        drop_last=False,
        device=configs.device,
        tokenizer_name=configs.model_name
    )
    
    # 创建多任务模型
    model = DeBERTaMoEClassifier(
        pretrained_model_name=configs.model_name,
        num_classes=configs.num_classes,
        freeze_pooler=configs.freeze_pooler,
        num_rnn_layers=configs.num_rnn_layers,
        expert_hidden_size=configs.expert_hidden_size,
        dropout=configs.dropout,
        num_tasks=configs.num_tasks  # 指定4个任务
    ).to(configs.device)

    # 为每个任务定义交叉熵损失函数
    criterion = nn.CrossEntropyLoss()

    # 定义优化器
    optimizer = AdamW(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=configs.lr
    )
    
    # 添加Warmup + Cosine Decay学习率调度
    from transformers import get_cosine_schedule_with_warmup
    
    # 计算总训练步数
    total_steps = len(train_dataloader) * configs.epochs
    
    # 计算warmup步数 (默认总步数的10%，可通过configs.warmup_ratio调整)
    warmup_ratio = getattr(configs, 'warmup_ratio', 0.1)  # 如果未定义，则使用默认值0.1
    warmup_steps = int(warmup_ratio * total_steps)
    
    # 创建学习率调度器
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=total_steps
    )
    
    # 初始化最佳验证损失和早停计数器
    best_val_acc = 0.0
    best_val_f1 = 0.0
    best_val_loss = float('inf')
    patience_counter = 0
    
    # 定义类别名称
    class_names = ['Yes', 'To some extent', 'No']
    
    # 添加F1计算所需的库
    from sklearn.metrics import f1_score, confusion_matrix
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # 训练循环
    for epoch in range(configs.epochs):
        # 训练阶段
        model.train()
        train_loss = 0.0
        train_task_losses = [0.0] * configs.num_tasks  # 记录每个任务的损失
        train_task_accs = [0.0] * configs.num_tasks    # 记录每个任务的准确率
        train_task_preds = [[] for _ in range(configs.num_tasks)]  # 每个任务的预测结果
        train_task_labels = [[] for _ in range(configs.num_tasks)]  # 每个任务的真实标签
        
        with tqdm(
            train_dataloader,
            total=len(train_dataloader),
            desc=f'Epoch {epoch + 1}/{configs.epochs}',
            unit='batch',
            ncols=120
        ) as pbar:
            for input_ids, attention_mask, labels in pbar:
                optimizer.zero_grad()
                
                # 前向传播
                outputs = model(input_ids, attention_mask)
                logits_list = outputs['logits']  # 每个任务的预测结果列表
                loss_weights = outputs['loss_weights']  # 可学习的损失权重
                
                # 计算每个任务的损失并加权求和
                batch_losses = []
                for task_idx in range(configs.num_tasks):
                    task_labels = labels[:, task_idx].long()
                    task_loss = criterion(logits_list[task_idx], task_labels)
                    batch_losses.append(task_loss)
                    train_task_losses[task_idx] += task_loss.item()
                    
                    # 计算当前任务的准确率
                    task_preds = logits_list[task_idx].argmax(dim=1)
                    task_acc = (task_preds == task_labels).float().mean()
                    train_task_accs[task_idx] += task_acc.item()
                    
                    # 收集预测结果和真实标签
                    train_task_preds[task_idx].extend(task_preds.cpu().numpy())
                    train_task_labels[task_idx].extend(task_labels.cpu().numpy())
                
                # 将每个损失与其权重相乘，然后求和
                weighted_losses = [loss * weight for loss, weight in zip(batch_losses, loss_weights)]
                final_loss = sum(weighted_losses)
                
                # 反向传播
                final_loss.backward()
                optimizer.step()
                scheduler.step()  # 更新学习率
                
                train_loss += final_loss.item()
                
                # 更新进度条显示
                curr_lr = scheduler.get_last_lr()[0]
                curr_weights = loss_weights.detach().cpu().numpy()
                
                # 格式化权重显示
                weights_str = " ".join([f"w{i+1}:{w:.2f}" for i, w in enumerate(curr_weights)])
                
                pbar.set_postfix(
                    loss=f'{final_loss.item():.3f}',
                    lr=f'{curr_lr:.6f}',
                    weights=weights_str
                )
        
        # 计算每个任务的平均损失和准确率
        train_loss = train_loss / len(train_dataloader)
        train_task_losses = [loss / len(train_dataloader) for loss in train_task_losses]
        train_task_accs = [acc / len(train_dataloader) for acc in train_task_accs]
        
        # 计算每个任务的F1分数
        train_task_f1s = [
            f1_score(labels, preds, average='macro') 
            for labels, preds in zip(train_task_labels, train_task_preds)
        ]
        
        # 打印训练结果
        print(f'Training Loss: {train_loss:.4f}')
        for task_idx, task_name in enumerate(task_names):
            print(f'Task {task_name} - Loss: {train_task_losses[task_idx]:.4f}, '
                  f'Acc: {train_task_accs[task_idx]:.4f}, '
                  f'F1: {train_task_f1s[task_idx]:.4f}')
        
        # 打印当前的损失权重
        print(f'Current Loss Weights: {loss_weights.detach().cpu().numpy()}')
        
        # 为每个任务创建混淆矩阵
        for task_idx, task_name in enumerate(task_names):
            # 创建完整的三分类混淆矩阵
            cm_full = confusion_matrix(
                train_task_labels[task_idx], 
                train_task_preds[task_idx], 
                labels=[0, 1, 2]
            )
            plt.figure(figsize=(10, 8))
            sns.heatmap(cm_full, annot=True, fmt='d', cmap='Blues',
                        xticklabels=class_names,
                        yticklabels=class_names)
            plt.xlabel('Predicted')
            plt.ylabel('True')
            plt.title(f'Train: {task_name} Confusion Matrix\n'
                      f'Acc: {train_task_accs[task_idx]:.4f}, '
                      f'F1: {train_task_f1s[task_idx]:.4f}')
            
            # 保存完整混淆矩阵
            matrix_path = os.path.join(plot_dirs[task_name]['train'], f'cm_full_epoch_{epoch+1}.png')
            plt.savefig(matrix_path)
            plt.close()
        
        # 验证阶段
        model.eval()
        val_loss = 0.0
        val_task_losses = [0.0] * configs.num_tasks
        val_task_corrects = [0.0] * configs.num_tasks
        val_task_preds = [[] for _ in range(configs.num_tasks)]
        val_task_labels = [[] for _ in range(configs.num_tasks)]

        with torch.no_grad():
            for input_ids, attention_mask, labels in val_dataloader:
                # 前向传播
                outputs = model(input_ids, attention_mask)
                logits_list = outputs['logits']
                loss_weights = outputs['loss_weights']
                
                # 计算每个任务的损失
                for task_idx in range(configs.num_tasks):
                    task_labels = labels[:, task_idx].long()
                    task_logits = logits_list[task_idx]
                    
                    task_loss = criterion(task_logits, task_labels)
                    val_task_losses[task_idx] += task_loss.item()
                    
                    task_preds = task_logits.argmax(dim=1)
                    task_corrects = (task_preds == task_labels).float().sum()
                    val_task_corrects[task_idx] += task_corrects.item()
                    
                    # 收集预测结果和真实标签
                    val_task_preds[task_idx].extend(task_preds.cpu().numpy())
                    val_task_labels[task_idx].extend(task_labels.cpu().numpy())
                
                # 计算加权总损失
                val_batch_losses = [criterion(logits_list[i], labels[:, i].long()) for i in range(configs.num_tasks)]
                weighted_losses = [loss * weight for loss, weight in zip(val_batch_losses, loss_weights)]
                val_loss += sum(weighted_losses).item()
        
        # 计算验证损失和准确率
        val_loss = val_loss / len(val_dataloader)
        val_task_losses = [loss / len(val_dataloader) for loss in val_task_losses]
        val_task_accs = [correct / len(val_dataset) for correct in val_task_corrects]
        
        # 计算每个任务的F1分数
        val_task_f1s = [
            f1_score(labels, preds, average='macro') 
            for labels, preds in zip(val_task_labels, val_task_preds)
        ]
        
        # 打印验证结果
        print(f'Validation Loss: {val_loss:.4f}')
        for task_idx, task_name in enumerate(task_names):
            print(f'Task {task_name} - Loss: {val_task_losses[task_idx]:.4f}, '
                  f'Acc: {val_task_accs[task_idx]:.4f}, '
                  f'F1: {val_task_f1s[task_idx]:.4f}')
        
        # 为每个任务创建验证集混淆矩阵
        for task_idx, task_name in enumerate(task_names):
            # 创建完整的三分类混淆矩阵
            cm_full = confusion_matrix(
                val_task_labels[task_idx], 
                val_task_preds[task_idx], 
                labels=[0, 1, 2]
            )
            plt.figure(figsize=(10, 8))
            sns.heatmap(cm_full, annot=True, fmt='d', cmap='Blues',
                        xticklabels=class_names,
                        yticklabels=class_names)
            plt.xlabel('Predicted')
            plt.ylabel('True')
            plt.title(f'Val: {task_name} Confusion Matrix\n'
                      f'Acc: {val_task_accs[task_idx]:.4f}, '
                      f'F1: {val_task_f1s[task_idx]:.4f}')
            
            # 保存完整混淆矩阵
            matrix_path = os.path.join(plot_dirs[task_name]['val'], f'cm_full_epoch_{epoch+1}.png')
            plt.savefig(matrix_path)
            plt.close()
        
        # 检查是否保存模型，使用Track 4 (Actionability)的F1分数作为主要指标
        # 这里我们特别关注Track 4的性能
        track4_idx = 2  # Track 4的索引
        if val_task_f1s[track4_idx] > best_val_f1:
            best_val_f1 = val_task_f1s[track4_idx]
            best_val_acc = val_task_accs[track4_idx]
            
            # 保存模型
            state_dict = model.state_dict()
            torch.save(state_dict, os.path.join(checkpoint_dir, 'best_model_f1.pt'))
            print(f'New best model saved with Track 4 F1: {best_val_f1:.4f}, Acc: {best_val_acc:.4f}')
            
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= configs.patience:
                print(f'Early stopping triggered after {epoch+1} epochs.')
                break

        model.train()
        
        # 保存每个epoch的损失权重
        np.save(
            os.path.join(checkpoint_dir, f'loss_weights_epoch_{epoch+1}.npy'), 
            loss_weights.detach().cpu().numpy()
        )
        
# 在以下主函数中添加判断Jupyter环境的逻辑
if __name__ == '__main__':
    # 判断是否在Jupyter环境中运行
    try:
        # 检查是否在Jupyter中运行
        get_ipython = globals().get('get_ipython', None)
        if get_ipython and 'IPKernelApp' in get_ipython().config:
            # 在Jupyter环境中运行，使用默认配置
            print("Running in Jupyter environment, using default configs")
            configs = get_default_configs()
        else:
            # 在命令行环境中运行，使用argparse
            configs = argparser()
    except:
        # 任何异常都使用argparse处理
        configs = argparser()
    
    # 设置实验名称
    if configs.name is None:
        configs.exp_name = \
            f'{os.path.basename(configs.model_name)}' + \
            f'{"_fp" if configs.freeze_pooler else ""}' + \
            f'_b{configs.batch_size}_e{configs.epochs}' + \
            f'_len{configs.max_length}_lr{configs.lr}'
    else:
        configs.exp_name = configs.name
    
    # 设置设备
    if configs.device is None:
        configs.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu'
        )
    
    # 调用训练函数
    train(configs)

Running in Jupyter environment, using default configs




当前使用的 tokenizer 类型： <class 'transformers.models.deberta_v2.tokenization_deberta_v2_fast.DebertaV2TokenizerFast'>
当前使用的 tokenizer 类型： <class 'transformers.models.deberta_v2.tokenization_deberta_v2_fast.DebertaV2TokenizerFast'>


Epoch 1/50: 100%|████████| 123/123 [01:04<00:00,  1.91batch/s, loss=1.006, lr=0.000004, weights=w1:0.27 w2:0.27 w3:0.45]


Training Loss: 1.1484
Task track2 - Loss: 1.0067, Acc: 0.5376, F1: 0.3377
Task track3 - Loss: 1.2979, Acc: 0.2139, F1: 0.1890
Task track4 - Loss: 1.1437, Acc: 0.3872, F1: 0.2699
Current Loss Weights: [0.27410626 0.274004   0.45188978]
Validation Loss: 1.0286
Task track2 - Loss: 0.8862, Acc: 0.6310, F1: 0.2579
Task track3 - Loss: 1.1959, Acc: 0.2036, F1: 0.1893
Task track4 - Loss: 1.0135, Acc: 0.5181, F1: 0.2525
New best model saved with Track 4 F1: 0.2525, Acc: 0.5181


Epoch 2/50: 100%|████████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.988, lr=0.000008, weights=w1:0.27 w2:0.27 w3:0.45]


Training Loss: 1.0261
Task track2 - Loss: 0.9339, Acc: 0.5655, F1: 0.3013
Task track3 - Loss: 1.1482, Acc: 0.2236, F1: 0.2068
Task track4 - Loss: 1.0080, Acc: 0.4929, F1: 0.3362
Current Loss Weights: [0.27420473 0.27385333 0.45194194]
Validation Loss: 1.0008
Task track2 - Loss: 0.8786, Acc: 0.6310, F1: 0.2579
Task track3 - Loss: 1.1123, Acc: 0.2016, F1: 0.1779
Task track4 - Loss: 1.0073, Acc: 0.5181, F1: 0.2356


Epoch 3/50: 100%|████████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.978, lr=0.000012, weights=w1:0.27 w2:0.27 w3:0.45]


Training Loss: 1.0156
Task track2 - Loss: 0.9299, Acc: 0.5823, F1: 0.3066
Task track3 - Loss: 1.1113, Acc: 0.2195, F1: 0.1929
Task track4 - Loss: 1.0097, Acc: 0.4751, F1: 0.3099
Current Loss Weights: [0.27439585 0.27363217 0.45197198]
Validation Loss: 0.9962
Task track2 - Loss: 0.8783, Acc: 0.6310, F1: 0.2579
Task track3 - Loss: 1.0990, Acc: 0.1895, F1: 0.1264
Task track4 - Loss: 1.0056, Acc: 0.5181, F1: 0.2356


Epoch 4/50: 100%|████████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.966, lr=0.000016, weights=w1:0.27 w2:0.27 w3:0.45]


Training Loss: 1.0121
Task track2 - Loss: 0.9146, Acc: 0.5767, F1: 0.3038
Task track3 - Loss: 1.1040, Acc: 0.2276, F1: 0.1946
Task track4 - Loss: 1.0158, Acc: 0.4883, F1: 0.3281
Current Loss Weights: [0.27470785 0.27332833 0.45196384]
Validation Loss: 0.9917
Task track2 - Loss: 0.8630, Acc: 0.6351, F1: 0.2679
Task track3 - Loss: 1.0952, Acc: 0.5746, F1: 0.2882
Task track4 - Loss: 1.0073, Acc: 0.4980, F1: 0.3310
New best model saved with Track 4 F1: 0.3310, Acc: 0.4980


Epoch 5/50: 100%|████████| 123/123 [01:03<00:00,  1.93batch/s, loss=1.008, lr=0.000020, weights=w1:0.28 w2:0.27 w3:0.45]


Training Loss: 1.0050
Task track2 - Loss: 0.9137, Acc: 0.5986, F1: 0.3304
Task track3 - Loss: 1.1031, Acc: 0.2190, F1: 0.1830
Task track4 - Loss: 1.0013, Acc: 0.5041, F1: 0.3433
Current Loss Weights: [0.27511564 0.27288315 0.4520012 ]
Validation Loss: 0.9763
Task track2 - Loss: 0.8247, Acc: 0.6653, F1: 0.3383
Task track3 - Loss: 1.0897, Acc: 0.6230, F1: 0.3409
Task track4 - Loss: 1.0002, Acc: 0.5081, F1: 0.3369
New best model saved with Track 4 F1: 0.3369, Acc: 0.5081


Epoch 6/50: 100%|████████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.927, lr=0.000020, weights=w1:0.28 w2:0.27 w3:0.45]


Training Loss: 0.9806
Task track2 - Loss: 0.8780, Acc: 0.6326, F1: 0.4179
Task track3 - Loss: 1.0903, Acc: 0.2388, F1: 0.2145
Task track4 - Loss: 0.9768, Acc: 0.5386, F1: 0.3745
Current Loss Weights: [0.27562994 0.27233782 0.45203224]
Validation Loss: 0.9453
Task track2 - Loss: 0.8358, Acc: 0.6895, F1: 0.3836
Task track3 - Loss: 1.0749, Acc: 0.6492, F1: 0.4101
Task track4 - Loss: 0.9340, Acc: 0.6310, F1: 0.4299
New best model saved with Track 4 F1: 0.4299, Acc: 0.6310


Epoch 7/50: 100%|████████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.827, lr=0.000020, weights=w1:0.28 w2:0.27 w3:0.45]


Training Loss: 0.9302
Task track2 - Loss: 0.8539, Acc: 0.6570, F1: 0.4271
Task track3 - Loss: 1.0733, Acc: 0.2790, F1: 0.2588
Task track4 - Loss: 0.8906, Acc: 0.6575, F1: 0.4655
Current Loss Weights: [0.275945   0.27155665 0.45249835]
Validation Loss: 0.8907
Task track2 - Loss: 0.8129, Acc: 0.7177, F1: 0.4497
Task track3 - Loss: 1.0528, Acc: 0.6593, F1: 0.4436
Task track4 - Loss: 0.8408, Acc: 0.6956, F1: 0.5008
New best model saved with Track 4 F1: 0.5008, Acc: 0.6956


Epoch 8/50: 100%|████████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.981, lr=0.000020, weights=w1:0.28 w2:0.27 w3:0.45]


Training Loss: 0.9062
Task track2 - Loss: 0.8479, Acc: 0.6845, F1: 0.4512
Task track3 - Loss: 1.0622, Acc: 0.4548, F1: 0.4135
Task track4 - Loss: 0.8483, Acc: 0.6794, F1: 0.4851
Current Loss Weights: [0.27610216 0.27069858 0.45319927]
Validation Loss: 0.8797
Task track2 - Loss: 0.7967, Acc: 0.7198, F1: 0.4449
Task track3 - Loss: 1.0434, Acc: 0.5665, F1: 0.4974
Task track4 - Loss: 0.8325, Acc: 0.6935, F1: 0.4928


Epoch 9/50: 100%|████████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.958, lr=0.000020, weights=w1:0.28 w2:0.27 w3:0.45]


Training Loss: 0.8819
Task track2 - Loss: 0.8309, Acc: 0.6946, F1: 0.4662
Task track3 - Loss: 1.0482, Acc: 0.5254, F1: 0.4692
Task track4 - Loss: 0.8139, Acc: 0.7058, F1: 0.5061
Current Loss Weights: [0.2762221  0.26982564 0.45395225]
Validation Loss: 0.8620
Task track2 - Loss: 0.7870, Acc: 0.7419, F1: 0.4852
Task track3 - Loss: 1.0362, Acc: 0.6694, F1: 0.4555
Task track4 - Loss: 0.8041, Acc: 0.7218, F1: 0.5200
New best model saved with Track 4 F1: 0.5200, Acc: 0.7218


Epoch 10/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.808, lr=0.000019, weights=w1:0.28 w2:0.27 w3:0.45]


Training Loss: 0.8602
Task track2 - Loss: 0.7941, Acc: 0.7221, F1: 0.4873
Task track3 - Loss: 1.0326, Acc: 0.5498, F1: 0.4932
Task track4 - Loss: 0.7981, Acc: 0.7226, F1: 0.5184
Current Loss Weights: [0.2764235 0.2689956 0.4545809]
Validation Loss: 0.8582
Task track2 - Loss: 0.8086, Acc: 0.7319, F1: 0.4594
Task track3 - Loss: 1.0329, Acc: 0.6069, F1: 0.5107
Task track4 - Loss: 0.7851, Acc: 0.7298, F1: 0.5287
New best model saved with Track 4 F1: 0.5287, Acc: 0.7298


Epoch 11/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.838, lr=0.000019, weights=w1:0.28 w2:0.27 w3:0.46]


Training Loss: 0.8451
Task track2 - Loss: 0.8011, Acc: 0.7246, F1: 0.5014
Task track3 - Loss: 1.0328, Acc: 0.5346, F1: 0.4946
Task track4 - Loss: 0.7611, Acc: 0.7434, F1: 0.5354
Current Loss Weights: [0.27650332 0.26810214 0.45539454]
Validation Loss: 0.8615
Task track2 - Loss: 0.7876, Acc: 0.7258, F1: 0.4462
Task track3 - Loss: 1.0312, Acc: 0.6472, F1: 0.5253
Task track4 - Loss: 0.8064, Acc: 0.7117, F1: 0.5093


Epoch 12/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.838, lr=0.000019, weights=w1:0.28 w2:0.27 w3:0.46]


Training Loss: 0.8359
Task track2 - Loss: 0.7872, Acc: 0.7271, F1: 0.4955
Task track3 - Loss: 1.0201, Acc: 0.5732, F1: 0.5253
Task track4 - Loss: 0.7572, Acc: 0.7495, F1: 0.5399
Current Loss Weights: [0.2766016  0.26729125 0.45610717]
Validation Loss: 0.8367
Task track2 - Loss: 0.7712, Acc: 0.7460, F1: 0.4808
Task track3 - Loss: 1.0193, Acc: 0.6855, F1: 0.4612
Task track4 - Loss: 0.7695, Acc: 0.7399, F1: 0.5360
New best model saved with Track 4 F1: 0.5360, Acc: 0.7399


Epoch 13/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.963, lr=0.000018, weights=w1:0.28 w2:0.27 w3:0.46]


Training Loss: 0.8167
Task track2 - Loss: 0.7560, Acc: 0.7581, F1: 0.5408
Task track3 - Loss: 1.0165, Acc: 0.5894, F1: 0.5241
Task track4 - Loss: 0.7367, Acc: 0.7642, F1: 0.5509
Current Loss Weights: [0.27674848 0.26645514 0.45679638]
Validation Loss: 0.8509
Task track2 - Loss: 0.7807, Acc: 0.7157, F1: 0.4705
Task track3 - Loss: 1.0286, Acc: 0.5726, F1: 0.5100
Task track4 - Loss: 0.7897, Acc: 0.7218, F1: 0.5218


Epoch 14/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.705, lr=0.000018, weights=w1:0.28 w2:0.27 w3:0.46]


Training Loss: 0.8001
Task track2 - Loss: 0.7398, Acc: 0.7754, F1: 0.5546
Task track3 - Loss: 1.0037, Acc: 0.5899, F1: 0.5547
Task track4 - Loss: 0.7181, Acc: 0.7591, F1: 0.5461
Current Loss Weights: [0.27692133 0.26564956 0.45742914]
Validation Loss: 0.8550
Task track2 - Loss: 0.7961, Acc: 0.6935, F1: 0.4636
Task track3 - Loss: 1.0398, Acc: 0.4940, F1: 0.4826
Task track4 - Loss: 0.7835, Acc: 0.7258, F1: 0.5242


Epoch 15/50: 100%|███████| 123/123 [01:04<00:00,  1.92batch/s, loss=0.740, lr=0.000018, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.7899
Task track2 - Loss: 0.7219, Acc: 0.7861, F1: 0.5780
Task track3 - Loss: 1.0017, Acc: 0.5803, F1: 0.5475
Task track4 - Loss: 0.7084, Acc: 0.7612, F1: 0.5475
Current Loss Weights: [0.27710304 0.26486614 0.45803076]
Validation Loss: 0.8232
Task track2 - Loss: 0.7735, Acc: 0.7319, F1: 0.4748
Task track3 - Loss: 1.0071, Acc: 0.6573, F1: 0.5445
Task track4 - Loss: 0.7468, Acc: 0.7440, F1: 0.5394
New best model saved with Track 4 F1: 0.5394, Acc: 0.7440


Epoch 16/50: 100%|███████| 123/123 [01:04<00:00,  1.92batch/s, loss=0.757, lr=0.000017, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.7731
Task track2 - Loss: 0.6989, Acc: 0.7993, F1: 0.5734
Task track3 - Loss: 0.9882, Acc: 0.6026, F1: 0.5745
Task track4 - Loss: 0.6938, Acc: 0.7612, F1: 0.5484
Current Loss Weights: [0.277309   0.26409543 0.4585956 ]
Validation Loss: 0.8282
Task track2 - Loss: 0.7594, Acc: 0.7298, F1: 0.4826
Task track3 - Loss: 1.0142, Acc: 0.5948, F1: 0.5448
Task track4 - Loss: 0.7628, Acc: 0.7298, F1: 0.5265


Epoch 17/50: 100%|███████| 123/123 [01:03<00:00,  1.92batch/s, loss=0.749, lr=0.000017, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.7630
Task track2 - Loss: 0.6938, Acc: 0.8074, F1: 0.5895
Task track3 - Loss: 0.9807, Acc: 0.6118, F1: 0.5772
Task track4 - Loss: 0.6797, Acc: 0.7708, F1: 0.5548
Current Loss Weights: [0.27749783 0.26337197 0.4591302 ]
Validation Loss: 0.8200
Task track2 - Loss: 0.7651, Acc: 0.7258, F1: 0.4799
Task track3 - Loss: 1.0143, Acc: 0.6230, F1: 0.5305
Task track4 - Loss: 0.7418, Acc: 0.7379, F1: 0.5348


Epoch 18/50: 100%|███████| 123/123 [01:03<00:00,  1.92batch/s, loss=0.694, lr=0.000016, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.7407
Task track2 - Loss: 0.6841, Acc: 0.8120, F1: 0.5862
Task track3 - Loss: 0.9702, Acc: 0.6209, F1: 0.5878
Task track4 - Loss: 0.6435, Acc: 0.7815, F1: 0.5619
Current Loss Weights: [0.2775993  0.262634   0.45976666]
Validation Loss: 0.8278
Task track2 - Loss: 0.7744, Acc: 0.7117, F1: 0.4714
Task track3 - Loss: 1.0157, Acc: 0.6169, F1: 0.5485
Task track4 - Loss: 0.7528, Acc: 0.7339, F1: 0.5458
New best model saved with Track 4 F1: 0.5458, Acc: 0.7339


Epoch 19/50: 100%|███████| 123/123 [01:04<00:00,  1.92batch/s, loss=0.697, lr=0.000016, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.7202
Task track2 - Loss: 0.6619, Acc: 0.8247, F1: 0.5886
Task track3 - Loss: 0.9629, Acc: 0.6545, F1: 0.6206
Task track4 - Loss: 0.6170, Acc: 0.7917, F1: 0.6091
Current Loss Weights: [0.27771503 0.261908   0.46037704]
Validation Loss: 0.8155
Task track2 - Loss: 0.7646, Acc: 0.7339, F1: 0.4811
Task track3 - Loss: 1.0080, Acc: 0.6552, F1: 0.5244
Task track4 - Loss: 0.7367, Acc: 0.7419, F1: 0.5674
New best model saved with Track 4 F1: 0.5674, Acc: 0.7419


Epoch 20/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.720, lr=0.000015, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.7124
Task track2 - Loss: 0.6495, Acc: 0.8216, F1: 0.5850
Task track3 - Loss: 0.9555, Acc: 0.6362, F1: 0.6030
Task track4 - Loss: 0.6122, Acc: 0.8125, F1: 0.6759
Current Loss Weights: [0.277812   0.26122144 0.46096653]
Validation Loss: 0.8143
Task track2 - Loss: 0.7471, Acc: 0.7520, F1: 0.4967
Task track3 - Loss: 0.9953, Acc: 0.6673, F1: 0.5466
Task track4 - Loss: 0.7523, Acc: 0.7359, F1: 0.5974
New best model saved with Track 4 F1: 0.5974, Acc: 0.7359


Epoch 21/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.662, lr=0.000014, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6994
Task track2 - Loss: 0.6292, Acc: 0.8384, F1: 0.6077
Task track3 - Loss: 0.9466, Acc: 0.6504, F1: 0.6201
Task track4 - Loss: 0.6019, Acc: 0.8196, F1: 0.7044
Current Loss Weights: [0.27796635 0.2605692  0.46146446]
Validation Loss: 0.8238
Task track2 - Loss: 0.7659, Acc: 0.7379, F1: 0.4889
Task track3 - Loss: 1.0205, Acc: 0.6270, F1: 0.5213
Task track4 - Loss: 0.7476, Acc: 0.7157, F1: 0.6112
New best model saved with Track 4 F1: 0.6112, Acc: 0.7157


Epoch 22/50: 100%|███████| 123/123 [01:03<00:00,  1.92batch/s, loss=0.679, lr=0.000014, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6900
Task track2 - Loss: 0.6318, Acc: 0.8364, F1: 0.6187
Task track3 - Loss: 0.9308, Acc: 0.6636, F1: 0.6306
Task track4 - Loss: 0.5894, Acc: 0.8404, F1: 0.7433
Current Loss Weights: [0.27805656 0.25996804 0.46197543]
Validation Loss: 0.8267
Task track2 - Loss: 0.7796, Acc: 0.7319, F1: 0.4865
Task track3 - Loss: 1.0135, Acc: 0.6371, F1: 0.5246
Task track4 - Loss: 0.7500, Acc: 0.7016, F1: 0.6119
New best model saved with Track 4 F1: 0.6119, Acc: 0.7016


Epoch 23/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.789, lr=0.000013, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6809
Task track2 - Loss: 0.6219, Acc: 0.8389, F1: 0.6333
Task track3 - Loss: 0.9184, Acc: 0.6535, F1: 0.6307
Task track4 - Loss: 0.5830, Acc: 0.8354, F1: 0.7576
Current Loss Weights: [0.2781528  0.25941724 0.46242997]
Validation Loss: 0.8051
Task track2 - Loss: 0.7648, Acc: 0.7379, F1: 0.5030
Task track3 - Loss: 0.9880, Acc: 0.6673, F1: 0.5585
Task track4 - Loss: 0.7268, Acc: 0.7399, F1: 0.6592
New best model saved with Track 4 F1: 0.6592, Acc: 0.7399


Epoch 24/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.711, lr=0.000012, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6647
Task track2 - Loss: 0.6110, Acc: 0.8323, F1: 0.6325
Task track3 - Loss: 0.9027, Acc: 0.6895, F1: 0.6605
Task track4 - Loss: 0.5637, Acc: 0.8572, F1: 0.7899
Current Loss Weights: [0.2782368  0.25889075 0.46287248]
Validation Loss: 0.8107
Task track2 - Loss: 0.7724, Acc: 0.7339, F1: 0.5111
Task track3 - Loss: 1.0029, Acc: 0.6593, F1: 0.5461
Task track4 - Loss: 0.7262, Acc: 0.7198, F1: 0.6356


Epoch 25/50: 100%|███████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.631, lr=0.000012, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6545
Task track2 - Loss: 0.5788, Acc: 0.8587, F1: 0.6508
Task track3 - Loss: 0.8904, Acc: 0.6931, F1: 0.6671
Task track4 - Loss: 0.5682, Acc: 0.8542, F1: 0.7945
Current Loss Weights: [0.27838233 0.25840276 0.46321484]
Validation Loss: 0.8363
Task track2 - Loss: 0.8014, Acc: 0.7097, F1: 0.4866
Task track3 - Loss: 1.0133, Acc: 0.6290, F1: 0.5257
Task track4 - Loss: 0.7586, Acc: 0.7258, F1: 0.6252


Epoch 26/50: 100%|███████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.676, lr=0.000011, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6448
Task track2 - Loss: 0.5780, Acc: 0.8542, F1: 0.6672
Task track3 - Loss: 0.8747, Acc: 0.7073, F1: 0.6845
Task track4 - Loss: 0.5569, Acc: 0.8577, F1: 0.7914
Current Loss Weights: [0.27849522 0.25795633 0.4635484 ]
Validation Loss: 0.8123
Task track2 - Loss: 0.7682, Acc: 0.7278, F1: 0.4967
Task track3 - Loss: 0.9976, Acc: 0.6613, F1: 0.5532
Task track4 - Loss: 0.7356, Acc: 0.7319, F1: 0.6033


Epoch 27/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.610, lr=0.000010, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6307
Task track2 - Loss: 0.5730, Acc: 0.8547, F1: 0.6649
Task track3 - Loss: 0.8771, Acc: 0.6839, F1: 0.6658
Task track4 - Loss: 0.5284, Acc: 0.8780, F1: 0.8293
Current Loss Weights: [0.2785652  0.25750795 0.46392694]
Validation Loss: 0.8150
Task track2 - Loss: 0.7723, Acc: 0.7218, F1: 0.4834
Task track3 - Loss: 0.9910, Acc: 0.6331, F1: 0.5047
Task track4 - Loss: 0.7428, Acc: 0.7298, F1: 0.6108


Epoch 28/50: 100%|███████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.628, lr=0.000010, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6229
Task track2 - Loss: 0.5563, Acc: 0.8699, F1: 0.7006
Task track3 - Loss: 0.8629, Acc: 0.6946, F1: 0.6787
Task track4 - Loss: 0.5298, Acc: 0.8765, F1: 0.8250
Current Loss Weights: [0.27867156 0.25709838 0.46423015]
Validation Loss: 0.8190
Task track2 - Loss: 0.8019, Acc: 0.7036, F1: 0.4772
Task track3 - Loss: 0.9939, Acc: 0.6351, F1: 0.5115
Task track4 - Loss: 0.7323, Acc: 0.7480, F1: 0.6425


Epoch 29/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.500, lr=0.000009, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6199
Task track2 - Loss: 0.5618, Acc: 0.8669, F1: 0.7002
Task track3 - Loss: 0.8516, Acc: 0.7231, F1: 0.7034
Task track4 - Loss: 0.5267, Acc: 0.8740, F1: 0.8253
Current Loss Weights: [0.2787438  0.25673988 0.4645163 ]
Validation Loss: 0.8276
Task track2 - Loss: 0.8016, Acc: 0.7056, F1: 0.5109
Task track3 - Loss: 0.9985, Acc: 0.6351, F1: 0.5256
Task track4 - Loss: 0.7488, Acc: 0.7218, F1: 0.6160


Epoch 30/50: 100%|███████| 123/123 [01:03<00:00,  1.93batch/s, loss=0.558, lr=0.000008, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6079
Task track2 - Loss: 0.5448, Acc: 0.8760, F1: 0.7181
Task track3 - Loss: 0.8462, Acc: 0.7058, F1: 0.6948
Task track4 - Loss: 0.5142, Acc: 0.8923, F1: 0.8611
Current Loss Weights: [0.27882355 0.25639427 0.46478218]
Validation Loss: 0.8222
Task track2 - Loss: 0.7888, Acc: 0.7117, F1: 0.5035
Task track3 - Loss: 1.0083, Acc: 0.6310, F1: 0.5353
Task track4 - Loss: 0.7397, Acc: 0.7359, F1: 0.6149


Epoch 31/50: 100%|███████| 123/123 [01:03<00:00,  1.94batch/s, loss=0.534, lr=0.000008, weights=w1:0.28 w2:0.26 w3:0.46]


Training Loss: 0.6022
Task track2 - Loss: 0.5359, Acc: 0.8735, F1: 0.7204
Task track3 - Loss: 0.8257, Acc: 0.7393, F1: 0.7282
Task track4 - Loss: 0.5188, Acc: 0.8872, F1: 0.8489
Current Loss Weights: [0.2789045  0.25609565 0.4649999 ]
Validation Loss: 0.8183
Task track2 - Loss: 0.7909, Acc: 0.7177, F1: 0.5331
Task track3 - Loss: 0.9950, Acc: 0.6552, F1: 0.5519
Task track4 - Loss: 0.7375, Acc: 0.7117, F1: 0.6181
Early stopping triggered after 31 epochs.
