In [1]:
from SSR.dataset import BrainLesionDataset
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
from torchvision import transforms

# 定义图像和掩膜的预处理与增强
base_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# 创建完整数据集实例
csv_file = 'Dataset_BUSI_with_GT/metadata.csv'
full_dataset = BrainLesionDataset(csv_file=csv_file, transform=base_transform)

# 将数据集划分为训练集、验证集和测试集
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# 定义批量大小
batch_size = 8  # 可根据硬件资源调整批量大小

# 创建DataLoader，用于批量加载训练、验证和测试数据
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=os.cpu_count())
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=os.cpu_count())
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=os.cpu_count())

# 示例：检查训练集中的图像和掩膜大小
for images, masks, image_paths, mask_paths in train_loader:
    print(images.shape, masks.shape)
    break



torch.Size([8, 1, 256, 256]) torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256]) torch.Size([8, 1, 256, 256])


---

# Benchmark

In [4]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from monai.networks.nets import BasicUNet
from monai.losses import DiceCELoss
from SSR.pipeline import train_epoch, val_epoch
import mlflow
from IPython.display import clear_output

# 配置设备、模型和损失函数
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BasicUNet(
    in_channels=1,
    out_channels=1,
    features=(32, 32, 64, 128, 256, 32),
    spatial_dims=2,
).to(device)

optimizer = Adam(model.parameters(), lr=0.001)
criterion = DiceCELoss()

# 定义训练参数
total_steps = 10000  # 固定的训练步数
current_steps = 0    # 当前步数计数
confidence_threshold = 0.6  # 低信心阈值

# 启动 MLflow 实验
mlflow.set_experiment("Breast Ultrasound Segmentation")  # 设置实验名称

with mlflow.start_run(run_name="BasicUNet Baseline"):
    # 记录参数
    mlflow.log_param("learning_rate", 0.001)
    mlflow.log_param("total_steps", total_steps)

    epoch = 0
    while current_steps < total_steps:
        print(f"\nStarting at Step {current_steps + 1}")
        print("-" * 40)

        # 1. 训练阶段
        train_loss, train_dice, _ = train_epoch(
            model=model, 
            train_loader=train_loader, 
            optimizer=optimizer, 
            criterion=criterion, 
            device=device, 
            confidence_threshold=confidence_threshold
        )
        
        # 计算当前训练步数
        current_steps += len(train_loader)  # 每个 train_loader 的长度即为一个 epoch 的步数
        print(f"Training Loss: {train_loss:.4f}, Training Dice: {train_dice:.4f}")
        print(f"Current steps: {current_steps}/{total_steps}")

        # 2. 验证阶段
        val_loss, val_dice = val_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device
        )
        print(f"Validation Loss: {val_loss:.4f}, Validation Dice Score: {val_dice:.4f}")

        # 记录指标
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("train_dice", train_dice, step=epoch)
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_dice", val_dice, step=epoch)

        # 若达到总步数限制，则停止训练
        if current_steps >= total_steps:
            print("Reached total training steps. Ending training.")
            break

        print("-" * 40)
        clear_output()
        epoch += 1

    # 保存模型
    mlflow.pytorch.log_model(model, "model")



Starting at Step 9976
----------------------------------------
Starting training epoch...
Batch 1/57 - Loss: 0.7068, Dice: 0.9676, Low confidence samples: 0
Batch 2/57 - Loss: 0.8291, Dice: 0.9275, Low confidence samples: 1
Batch 3/57 - Loss: 0.6761, Dice: 0.9210, Low confidence samples: 0
Batch 4/57 - Loss: 0.6891, Dice: 0.9353, Low confidence samples: 0
Batch 5/57 - Loss: 0.6905, Dice: 0.9436, Low confidence samples: 0
Batch 6/57 - Loss: 0.6649, Dice: 0.9497, Low confidence samples: 0
Batch 7/57 - Loss: 0.7469, Dice: 0.9506, Low confidence samples: 0
Batch 8/57 - Loss: 0.6846, Dice: 0.9506, Low confidence samples: 0
Batch 9/57 - Loss: 0.6671, Dice: 0.9529, Low confidence samples: 0
Batch 10/57 - Loss: 0.6885, Dice: 0.9549, Low confidence samples: 0
Batch 11/57 - Loss: 0.6653, Dice: 0.9566, Low confidence samples: 0
Batch 12/57 - Loss: 0.7075, Dice: 0.9580, Low confidence samples: 0
Batch 13/57 - Loss: 0.6890, Dice: 0.9588, Low confidence samples: 0
Batch 14/57 - Loss: 0.6772, Dice: 



---

# SSR

In [6]:
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from monai.networks.nets import BasicUNet
from monai.losses import DiceCELoss
from IPython.display import clear_output
from SSR.pipeline import train_epoch, val_epoch, review_epoch
from SSR.dataset import ReviewDataset
import mlflow
from torchvision import transforms
import os

# 定义复习阶段的数据增强变换
review_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
])

# 配置设备、模型和损失函数
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BasicUNet(
    in_channels=1,
    out_channels=1,
    features=(32, 32, 64, 128, 256, 32),
    spatial_dims=2,
).to(device)

optimizer = Adam(model.parameters(), lr=0.001)
criterion = DiceCELoss()

# 定义实验参数
total_steps = 10000  # 固定的训练总步数
current_steps = 0    # 当前训练的步数计数
review_epochs = 4   # 每轮复习的次数
confidence_threshold = 0.7  # 低信心阈值

# 启动 MLflow 实验
mlflow.set_experiment("Breast Ultrasound Segmentation with Review")  # 设置实验名称

with mlflow.start_run(run_name="BasicUNet with Review Epoch"):
    # 记录参数
    mlflow.log_param("learning_rate", 0.001)
    mlflow.log_param("total_steps", total_steps)
    mlflow.log_param("review_epochs_per_round", review_epochs)
    mlflow.log_param("confidence_threshold", confidence_threshold)

    epoch = 0
    while current_steps < total_steps:
        print(f"\nStarting at Step {current_steps + 1}")
        print("-" * 40)

        # 1. 训练阶段
        train_loss, train_dice, low_confidence_samples = train_epoch(
            model=model, 
            train_loader=train_loader, 
            optimizer=optimizer, 
            criterion=criterion, 
            device=device, 
            confidence_threshold=confidence_threshold
        )
        print(f"Training Loss: {train_loss:.4f}, Training Dice: {train_dice:.4f}")
        
        # 累计步数
        current_steps += len(train_loader)
        print(f"Current steps: {current_steps}/{total_steps}")
        
        # 记录训练指标
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("train_dice", train_dice, step=epoch)

        # 检查是否达到总步数限制
        if current_steps >= total_steps:
            print("Reached total training steps. Ending training.")
            break

        # 2. 验证阶段
        val_loss, val_dice = val_epoch(
            model=model, 
            val_loader=val_loader, 
            criterion=criterion, 
            device=device
        )
        print(f"Validation Loss: {val_loss:.4f}, Validation Dice Score: {val_dice:.4f}")
        
        # 记录验证指标
        mlflow.log_metric("val_loss", val_loss, step=epoch)
        mlflow.log_metric("val_dice", val_dice, step=epoch)

        # 3. 复习阶段（仅当有低信心样本且未达到步数限制时）
        if low_confidence_samples and current_steps < total_steps:
            print(f"{len(low_confidence_samples)} low confidence samples collected for review.")
            review_dataset = ReviewDataset(samples=low_confidence_samples, transform=review_transform)
            review_loader = DataLoader(review_dataset, batch_size=batch_size, shuffle=True, num_workers=os.cpu_count())

            # 执行复习过程
            for review_epoch_num in range(review_epochs):
                if current_steps >= total_steps:
                    break  # 若达到总步数限制，则停止复习阶段
                print(f"\nReview Epoch {review_epoch_num + 1}/{review_epochs}")
                
                review_loss, review_dice = review_epoch(
                    model=model, 
                    review_loader=review_loader, 
                    optimizer=optimizer, 
                    criterion=criterion, 
                    device=device
                )
                print(f"Review Loss: {review_loss:.4f}, Review Dice: {review_dice:.4f}")
                
                # 累计复习步数
                current_steps += len(review_loader)
                print(f"Current steps (including review): {current_steps}/{total_steps}")

                # 记录复习阶段的指标
                mlflow.log_metric("review_loss", review_loss, step=epoch * review_epochs + review_epoch_num)
                mlflow.log_metric("review_dice", review_dice, step=epoch * review_epochs + review_epoch_num)

            # 复习后的验证阶段
            if current_steps < total_steps:
                review_val_loss, review_val_dice = val_epoch(
                    model=model, 
                    val_loader=val_loader, 
                    criterion=criterion, 
                    device=device
                )
                print(f"Post-Review Validation Loss: {review_val_loss:.4f}, Post-Review Validation Dice Score: {review_val_dice:.4f}")
                
                # 记录复习后的验证指标
                mlflow.log_metric("post_review_val_loss", review_val_loss, step=epoch)
                mlflow.log_metric("post_review_val_dice", review_val_dice, step=epoch)
        else:
            print("Skipping review epoch as no low confidence samples were found.")

        print("-" * 40)
        clear_output()
        epoch += 1

    # 保存模型
    mlflow.pytorch.log_model(model, "model")






In [3]:
import random
from PIL import Image
import matplotlib.pyplot as plt

def visualize_random_sample(low_confidence_samples):
    # 随机抽取一个样本
    print(f"Length: {len(low_confidence_samples)}")
    sample = random.choice(low_confidence_samples)
    image_path = sample['image_path']
    mask_path = sample['mask_path']

    # 打开图像和掩膜
    image = Image.open(image_path).convert('L')
    mask = Image.open(mask_path).convert('L')

    # 显示图像和掩膜
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(image, cmap='gray')
    axs[0].set_title('Image')
    axs[0].axis('off')

    axs[1].imshow(mask, cmap='gray')
    axs[1].set_title('Mask')
    axs[1].axis('off')

    plt.show()

# 使用示例
visualize_random_sample(low_confidence_samples)


NameError: name 'low_confidence_samples' is not defined