In [None]:
"""ROC曲线与AUC计算"""
from sklearn.metrics import roc_curve, auc
import numpy as np

def calculate_auc(y_true, y_scores):
    """
    计算AUC值
    :param y_true: 真实标签
    :param y_scores: 模型预测的分数或概率
    :return: AUC值
    """
    # fpr, tpr, _ = roc_curve(y_true, y_scores)
    # auc =  auc(fpr, tpr)
    y_true = np.asarray(y_true)
    y_scores = np.asarray(y_scores)
    categroy = np.unique(y_true) # 必须是二分类标签(0, 1)
    actual_positive = np.sum(y_true == 1)
    actual_negative = np.sum(y_true == 0)

    # 创建 (score, label) 对并按 score 降序排序
    pairs = sorted(zip(y_scores, y_true), reverse=True)

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

# https://claude.ai/chat/d2314b00-9f3a-439d-b077-0720a2cac08f
# https://github.com/copilot/c/f3f44714-743e-418f-8c3d-e2ce2a6cc6c2
class MetricsEvaluationVisualization:
    """
    PyTorch深度学习模型性能指标评估与可视化模块
    用于输出模型指标（Accuracy、Precision、Recall、F1-score），
    并绘制混淆矩阵与特征分布图，强化模型理解与诊断能力。
    """

    def __init__(self,
                 model,
                 test_loader: DataLoader,
                 device=None,
                 class_names=None,
                 loss_fn=None,
                 train_history=None,
                 valid_history=None):
        """
        :param model: 训练好的机器学习模型
        :param test_loader: 测试数据加载器
        :param device: 'cpu'或'cuda'
        :param class_names: 类别名称列表（可选）
        :param loss_fn: 损失函数（可选，用于计算测试损失）
        :param train_history: 训练过程中的指标记录（可选，用于绘制训练曲线）
        :param valid_history: 验证过程中的指标记录（可选，用于绘制训练曲线）
        """
        if device is None:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = device
        self.model = model.to(self.device)
        self._check_eval_mode()
        self.test_loader = test_loader
        self.loss_fn = loss_fn
        self.categories = class_names
        self.train_history = train_history
        self.valid_history = valid_history

        # 用于存储评估结果
        self.y_true = []
        self.y_pred = []
        self.y_score = []
        self.test_loss: float = 0.0

    def _check_eval_mode(self):
        """确保模型处于评估模式"""
        if self.model.training:
            print("模型处于训练模式，切换到评估模式")
            self.model.eval()
        else:
            print("模型已处于评估模式")
            self.model.eval()

    def calculate_metrics(self):
        """计算并返回评估指标"""
        if len(self.y_true) == 0 or len(self.y_pred) == 0:
            print("没有可用的预测结果，先调用evaluate方法。")
            self.evaluate()

        num_samples = len(self.test_loader.dataset)
        metrics = {
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1_score': 0.0
        }

    def evaluate(self, return_outputs=False):
        """
        评估模型性能指标
        :param return_outputs: 是否返回所有预测的结果
        :return:evaluation_metrics: 评估指标字典
        :return:(可选) outputs: 包含真实标签、预测标签和预测概率的元组
        """
        self.y_true = []
        self.y_pred = []
        self.y_score = []
        self.test_loss = 0.0
        self._check_eval_mode()
        test_bar = tqdm(self.test_loader, desc="---Evaluating---", unit="batch", leave=False)
        with torch.no_grad():
            for data, target in test_bar:
                data, target = data.to(self.device), target.to(self.device)

                # 前向传播
                scores = self.model(data)

                # 计算损失
                if self.loss_fn is not None:
                    loss = self.loss_fn(scores, target)
                    self.test_loss += loss.item()

                # 记录真实标签和预测标签
                _, predicted = torch.max(scores, 1)
                self.y_true.extend(target.cpu().numpy())
                self.y_pred.extend(predicted.cpu().numpy())
                self.y_score.extend(scores.cpu().numpy())

                # 计算metrics
                pass




In [None]:
# Check accuracy on training & test to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        accuracy = float(num_correct) / float(num_samples) * 100
    model.train()
    return accuracy

In [None]:
# Check accuracy on training & test to see how good our model is
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            x = x.reshape(x.shape[0], -1)

            scores = model(embedding(x),
                           inline_input=inline_input,
                           inline_mats=inline_mats,
                           renormalize=renormalize)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        accuracy = float(num_correct) / float(num_samples) * 100
    model.train()
    return accuracy

In [1]:
import torch
scores = torch.randn(5)
print(scores)
predictions = (scores > 0.5).float()  # 多标签二值化
print(predictions)

tensor([ 0.7476, -0.9891, -1.2032, -0.5545, -0.2176])
tensor([1., 0., 0., 0., 0.])
