# 验证数据集配置 示例代码

In [68]:
import sys
sys.path.append('../../jmseg')
from jmseg.registry import build_dataloader_from_cfg, build_dataset_from_cfg
from mmengine.config import Config
import torch
import os

In [69]:
# 配置文件路径
config_path = '../jmseg/configs/_base_/datasets/drive.py'
cfg = Config.fromfile(config_path)
cfg

Config (path: ../jmseg/configs/_base_/datasets/drive.py): {'dataset_type': 'Dataset', 'data_root': 'data/DRIVE', 'in_channels': 3, 'num_classes': 1, 'crop_size': (128, 128), 'inferer': {'type': 'SlidingWindowInferer', 'roi_size': (128, 128), 'sw_batch_size': 4}, 'metrics': [{'type': 'DiceMetric', 'include_background': False, 'reduction': 'mean'}, {'type': 'ConfusionMatrixMetric', 'include_background': False, 'reduction': 'mean', 'metric_name': 'accuracy'}], 'train_pipeline': [{'type': 'LoadImaged', 'keys': ['images', 'labels'], 'reader': 'PILReader', 'ensure_channel_first': True, 'image_only': True}, {'type': 'ScaleIntensityd', 'keys': ['images', 'labels']}, {'type': 'RandZoomd', 'keys': ['images', 'labels'], 'prob': 0.5, 'min_zoom': 0.8, 'max_zoom': 1.2, 'mode': ('bilinear', 'nearest')}, {'type': 'RandRotated', 'keys': ['images', 'labels'], 'prob': 0.5, 'range_x': 0.3, 'mode': ('bilinear', 'nearest')}, {'type': 'RandAxisFlipd', 'keys': ['images', 'labels'], 'prob': 0.5}, {'type': 'Ran

1. 检查img和label路径是否有误

In [70]:
# 获取配置中的dataloader配置
train_dataloader = cfg.get('train_dataloader', None)
assert train_dataloader is not None

# 获取dataloader中的dataset配置
train_dataset = train_dataloader.get('dataset', None)
assert train_dataset is not None

# 获取dataset中的pipline配置
pipline = train_dataset.pop('pipeline', None)
# 获取dataset的data_root
data_root = train_dataset.pop('data_root', None)

In [71]:
# 查看data_root
print(data_root)

data/DRIVE


In [72]:
# 如果data_root是相对路径，因为本代码路径在testing目录下, 相对路径会出现错误，需要进行更改
if not data_root.startswith('/'):
    data_root = os.path.join('../', data_root)
    print(data_root)

../data/DRIVE


In [73]:
# 更新data_root，并将pipeline置空，先检查路径，后续再检查pipeline是否正确
train_dataset.update(dict(data_root=data_root, pipeline=[]))

In [74]:
print(train_dataset.type)

Dataset


In [75]:
dataset = build_dataset_from_cfg(train_dataset)

In [76]:
for i in dataset:
    print(i['images'], i['labels'])

../data/DRIVE/training/images/31_training.tif ../data/DRIVE/training/1st_manual/31_manual1.gif
../data/DRIVE/training/images/38_training.tif ../data/DRIVE/training/1st_manual/38_manual1.gif
../data/DRIVE/training/images/27_training.tif ../data/DRIVE/training/1st_manual/27_manual1.gif
../data/DRIVE/training/images/34_training.tif ../data/DRIVE/training/1st_manual/34_manual1.gif
../data/DRIVE/training/images/25_training.tif ../data/DRIVE/training/1st_manual/25_manual1.gif
../data/DRIVE/training/images/23_training.tif ../data/DRIVE/training/1st_manual/23_manual1.gif
../data/DRIVE/training/images/26_training.tif ../data/DRIVE/training/1st_manual/26_manual1.gif
../data/DRIVE/training/images/35_training.tif ../data/DRIVE/training/1st_manual/35_manual1.gif
../data/DRIVE/training/images/30_training.tif ../data/DRIVE/training/1st_manual/30_manual1.gif
../data/DRIVE/training/images/28_training.tif ../data/DRIVE/training/1st_manual/28_manual1.gif
../data/DRIVE/training/images/37_training.tif ../d

2. 检查pipeline是否存在问题

In [78]:
train_dataset.update(dict(pipeline=pipline))

In [79]:
dataset = build_dataset_from_cfg(train_dataset)

In [81]:
print(dataset.__getitem__(0))

[{'images': tensor([[[-0.0293, -0.0332, -0.0187,  ..., -0.0243, -0.0326, -0.0253],
         [ 0.0949,  0.0420,  0.0064,  ..., -0.0279, -0.0320, -0.0253],
         [ 0.2331,  0.1483,  0.0640,  ..., -0.0263, -0.0275, -0.0253],
         ...,
         [ 0.7292,  0.7364,  0.7508,  ..., -0.0191, -0.0202, -0.0256],
         [ 0.7321,  0.7403,  0.7454,  ..., -0.0207, -0.0195, -0.0134],
         [ 0.7409,  0.7479,  0.7596,  ..., -0.0250, -0.0242, -0.0163]],

        [[-0.0306, -0.0200, -0.0232,  ..., -0.0243, -0.0326, -0.0264],
         [-0.0519, -0.0402, -0.0365,  ..., -0.0298, -0.0330, -0.0272],
         [-0.0567, -0.0567, -0.0492,  ..., -0.0341, -0.0317, -0.0298],
         ...,
         [ 0.3057,  0.3142,  0.3173,  ..., -0.0253, -0.0244, -0.0316],
         [ 0.3049,  0.3139,  0.3079,  ..., -0.0285, -0.0236, -0.0268],
         [ 0.3081,  0.3118,  0.3124,  ..., -0.0294, -0.0282, -0.0229]],

        [[-0.0308, -0.0287, -0.0304,  ..., -0.0243, -0.0399, -0.0326],
         [-0.0294, -0.0234, -0.02

In [85]:
# 训练dataloader的pipeline中的LoadImaged指定了image_only参数为True，所以只有iamges, labels两个key
# 如果image_only为False，则会有images_meta和labels_meta信息
print(dataset.__getitem__(0)[0].keys())
print(dataset.__getitem__(0)[0]['images'].shape)
print(dataset.__getitem__(0)[0]['labels'].shape)

dict_keys(['images', 'labels'])
torch.Size([3, 128, 128])
torch.Size([1, 128, 128])


3. 检查dataloader

In [86]:
dataloader = build_dataloader_from_cfg(train_dataloader)

In [87]:
for batch in dataloader:
    # 输出TypedStorage is deprecated...警告是由于monai1.1.0和pytorch2.0.0版本不兼容导致的，可以忽略
    print(batch['images'].shape)
    print(batch['labels'].shape)
    break

TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To a

torch.Size([16, 3, 128, 128])
torch.Size([16, 1, 128, 128])


TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
