In [ ]:
!pip install segmentation-models-pytorch -q
!pip install lightning -q
!pip install wandb -U -q
!pip install monai -q

In [0]:
import IPython
import ipywidgets
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.tuner.tuning import Tuner
from IPython.display import display
import numpy as np
import nibabel as nib
import os
from os.path import join
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import segmentation_models_pytorch as smp
from segmentation_models_pytorch import utils
import albumentations as A
from albumentations.pytorch import ToTensorV2
import monai
from monai.losses import *
import pytorch_lightning as pl
from pytorch_lightning.tuner.tuning import Tuner
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning import LightningDataModule
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from fastai.losses import *
import torchmetrics
import wandb
# from lightning.pytorch.loggers import WandbLogger
from pytorch_lightning.core.mixins import HyperparametersMixin
from pytorch_lightning.callbacks import Callback
from matplotlib.patches import Patch, Rectangle
import cv2

IPython.display.clear_output()

print("Envirionment Set Up.")

In [None]:
# 定义文件加载和展示函数
def load_and_display_npz(image_path, mask_path):
    # 加载数据
    image_data = np.load(image_path)
    mask_data = np.load(mask_path)

    # 假设 npz 文件内部使用 'arr_0' 作为键名
    image = image_data['arr_0']
    mask = mask_data['arr_0']

    # 打印图像和掩码信息
    print("图像信息:")
    print(f"形状: {image.shape}, 类型: {image.dtype}")
    print("掩码信息:")
    print(f"形状: {mask.shape}, 类型: {mask.dtype}")

    # 显示图像和掩码
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].imshow(image, cmap='gray')
    ax[0].set_title('Image')
    ax[0].axis('off')  # 不显示坐标轴
    ax[1].imshow(mask, cmap='tab20')
    ax[1].set_title('Mask')
    ax[1].axis('off')  # 不显示坐标轴
    plt.show()

# 设定图像和掩码的路径
image_dir = '/kaggle/input/rawniidataset/SMU_Dataset/image_npz'
mask_dir = '/kaggle/input/rawniidataset/SMU_Dataset/mask_npz'

# 列出目录中的文件
image_files = sorted(os.listdir(image_dir))
mask_files = sorted(os.listdir(mask_dir))

# 假设图像文件和掩码文件名是一一对应的
for image_file, mask_file in zip(image_files, mask_files):
    # 构建完整的文件路径
    image_path = os.path.join(image_dir, image_file)
    mask_path = os.path.join(mask_dir, mask_file)

    # 加载并显示一对文件
    load_and_display_npz(image_path, mask_path)
    break  # 只显示第一对，之后可以移除这个break来查看所有配对文件



In [None]:
import os
import numpy as np
from tqdm.auto import tqdm

# 设定mask文件夹的路径
mask_dir = '/kaggle/input/rawniidataset/SMU_Dataset/mask_npz'

# 初始化一个长度为14的数组来存储每个类别的计数（假设类别标签是从0到13）
class_counts = np.zeros(14, dtype=int)

# 遍历mask文件夹中的所有npz文件
for filename in tqdm(os.listdir(mask_dir)):
    if filename.endswith('.npz'):
        file_path = os.path.join(mask_dir, filename)
        # 加载npz文件
        data = np.load(file_path)
        mask = data['arr_0']  # 假设mask数组存储在'arr_0'键下
        mask = mask.astype(int)  # 将mask转换为整数类型
        # 获取出现的类别标签，不计算每个类别的具体像素数
        unique_classes = np.unique(mask)  # 获取文件中出现的所有唯一类别标签
        # 对出现的每个类别计数增加1
        class_counts[unique_classes] += 1

# 打印每个类别的计数结果
for i, count in enumerate(class_counts):
    print(f"Class {i}: {count} times")


In [None]:
import os
import numpy as np

# 定义mask文件夹的路径
mask_dir = '/kaggle/input/rawniidataset/SMU_Dataset/mask_npz'

# 定义目标类别
target_classes = {4, 5, 10, 11, 12, 13}

# 初始化列表来存储选中的npz文件路径
selected_files = []

# 遍历mask文件夹中的所有npz文件
for filename in tqdm(os.listdir(mask_dir)):
    if filename.endswith('.npz'):
        file_path = os.path.join(mask_dir, filename)
        # 加载npz文件
        data = np.load(file_path)
        mask = data['arr_0']  # 假设mask数组存储在'arr_0'键下
        mask = mask.astype(int)  # 将mask转换为整数类型
        # 获取出现的类别标签
        unique_classes = np.unique(mask)
        # 检查是否有任意目标类别出现在文件中
        if any(cls in target_classes for cls in unique_classes):
            selected_files.append(file_path)

len(selected_files)


In [None]:
import random
# 假设 selected_files 是包含符合条件 npz 文件路径的列表

# 从 selected_files 中随机选取4个文件
if len(selected_files) >= 4:
    chosen_files = random.sample(selected_files, 4)
else:
    chosen_files = selected_files  # 如果不足4个，则全部选择

# 创建图形和子图
fig, axs = plt.subplots(1, 4, figsize=(20, 5))  # 1行4列的子图
fig.suptitle('Randomly Selected NPZ Masks')

# 加载每个选中文件的mask并显示
for i, file_path in enumerate(chosen_files):
    data = np.load(file_path)
    mask = data['arr_0']  # 假设mask数组存储在'arr_0'键下
    ax = axs[i]
    ax.imshow(mask, cmap='tab20')  # 使用tab20颜色映射以更好地区分不同类别
    ax.title.set_text(f'File: {os.path.basename(file_path)}')
    ax.axis('off')  # 关闭坐标轴

plt.tight_layout()
plt.show()


In [None]:
import os
import shutil

# 假设 selected_files 包含了我们需要复制的 mask 文件的完整路径

# 设置新数据集的目录
new_mask_dir = '/kaggle/working/AUG_dataset/mask_npz'
new_image_dir = '/kaggle/working/AUG_dataset/image_npz'
os.makedirs(new_mask_dir, exist_ok=True)  # 创建新的掩码文件目录
os.makedirs(new_image_dir, exist_ok=True)  # 创建新的图像文件目录

# 复制mask文件和对应的image文件
for mask_path in tqdm(selected_files):
    # 复制掩码文件
    shutil.copy(mask_path, new_mask_dir)
    
    # 生成对应的图像文件路径
    mask_filename = os.path.basename(mask_path)  # 获取掩码文件的文件名
    image_filename = mask_filename.replace('mask', 'image')  # 替换文件名中的 'mask' 为 'image'
    
    # 源图像文件路径
    original_image_path = mask_path.replace('mask_npz', 'image_npz').replace(mask_filename, image_filename)
    
    # 复制图像文件
    if os.path.exists(original_image_path):
        shutil.copy(original_image_path, new_image_dir)
    else:
        print(f"Image file not found for {original_image_path}")

# 打印完成消息
print("Selected masks and their corresponding images have been copied to the new dataset directory.")


In [None]:
# 定义文件路径
image_dir = '/kaggle/working/AUG_dataset/image_npz'
mask_dir = '/kaggle/working/AUG_dataset/mask_npz'

# 获取文件列表
image_files = [f for f in os.listdir(image_dir) if f.endswith('.npz')]
mask_files = [f for f in os.listdir(mask_dir) if f.endswith('.npz')]

# 随机选择一个图像文件
selected_image_file = np.random.choice(image_files)

# 构造对应的掩码文件名（假设命名规则是一致的，只是前缀不同）
selected_mask_file = selected_image_file.replace('image', 'mask')

# 读取图像和掩码文件
image_path = os.path.join(image_dir, selected_image_file)
mask_path = os.path.join(mask_dir, selected_mask_file)

image_data = np.load(image_path)['arr_0']
mask_data = np.load(mask_path)['arr_0']
# print mask unique
print(np.unique(mask_data))
# 可视化图像和掩码
fig, axs = plt.subplots(1, 2, figsize=(12, 6))

axs[0].imshow(image_data, cmap='gray')
axs[0].set_title('Image')
axs[0].axis('off')

axs[1].imshow(mask_data, cmap='tab20')
axs[1].set_title('Mask')
axs[1].axis('off')

plt.show()
