/
dataset.py
154 lines (132 loc) · 6.5 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# coding=utf-8
# Version:python 3.7
import scipy.io as io
import h5py
from torch.utils.data import DataLoader, Dataset
import glob
import os
from utils import gettransform, ToTensor, channel_minmax_normalize
from utils import minmax_normalize, crop_center, data_augmentation
from add_noise import *
"""
used to create the dataset.
Args:
matpath & ext: from matpath to load the expansion of the data named ext
datapath & ratio: the generated data into the datapath (divided into train and test, the segmentation ratio is ratio)
patch: the size of each image block of the segmentation is patch * patch
strides: the step size is strides
mode: selects whether the matpath is a single mat file split or multiple mat files split
h5py.File: Data applicable to this loading method: icvl & chongqing & urban
io.loadmat: Data applicable to this loading method: pavia
channel_first: used to indicate whether the channel is in the first dimension
key: indicates the keyword of the mat file
loader: indicates the reading mode of the mat file
tag: used to mark this segmentation
Results:
The run result will directly split all mat files in matpath into path-sized blocks and place them in train+tag and test+tag under datapath respectively according to the ratio
datapath/train & datapath/test
"""
def createdataset(matpath, datapath, ratio, patch, strides, mode, crop_size=None, channel_first=True, key='rad',
loader=h5py.File, tag='', ext='.mat'):
train_dir = datapath + f'train{tag}/'
test_dir = datapath + f'test{tag}/'
searchpath = matpath + f'*{ext}'
mat_list = glob.glob(searchpath)
split_point = int(len(mat_list) * ratio)
train_mat, test_mat = mat_list[:split_point], mat_list[split_point:]
if not os.path.exists(datapath):
os.mkdir(datapath)
if not os.path.exists(train_dir) or not os.path.exists(test_dir):
os.mkdir(train_dir)
os.mkdir(test_dir)
tr_count = 0
te_count = 0
if mode == 'single':
# mat_list = [matpath]
p_c = 0
else:
# mat_list = glob.glob(searchpath)
split_point = int(len(mat_list) * ratio)
train_mat, test_mat = mat_list[:split_point], mat_list[split_point:]
for mat_file in mat_list:
if key is not None:
data = loader(mat_file)[key]
else:
data = loader(mat_file)
data = np.array(data)
if not channel_first:
data = np.transpose(data, [2, 0, 1])
if crop_size:
data = crop_center(data, crop_size[0], crop_size[1])
data = minmax_normalize(data)
print(data.shape)
cn = (data.shape[0] - patch[0]) // strides[0] + 1
wn = (data.shape[1] - patch[1]) // strides[1] + 1
hn = (data.shape[2] - patch[2]) // strides[2] + 1
print('create {} images from {} in {}'.format(
cn * wn * hn,
mat_file, matpath))
for c in range(cn):
for h in range(wn):
for w in range(hn):
img = data[strides[0] * c: strides[0] * c + patch[0]:,
strides[1] * h: strides[1] * h + patch[1], strides[2] * w: strides[2] * w + patch[2]]
if mode == 'single':
if p_c % 10:
io.savemat(train_dir + str(tr_count) + '.mat', {'data': img})
tr_count += 1
else:
io.savemat(test_dir + str(te_count) + '.mat', {'data': img})
te_count += 1
p_c += 1
else:
if mat_file in train_mat:
io.savemat(train_dir + str(tr_count) + '.mat', {'data': img})
tr_count += 1
else:
io.savemat(test_dir + str(te_count) + '.mat', {'data': img})
te_count += 1
else:
print('dataset already exist!')
print('{} images for training, {} images for testing.'.format(len(glob.glob(train_dir + f'*{ext}')),
len(glob.glob(test_dir + f'*{ext}'))))
class ICVLData(Dataset):
def __init__(self, path, count, transforms=None, train=True):
self.path = path
self.count = count
self.transforms = transforms
self.tensor = ToTensor()
self.train = train
def __getitem__(self, index):
data = np.array(io.loadmat(self.path + f'{index}.mat')['data'])
# data = channel_minmax_normalize(data, channel_first=True)
if self.train:
data = data_augmentation(data)
img_gt = self.tensor(data)
if self.transforms:
noise_img = self.transforms(data)
# C H W
return img_gt, noise_img
def __len__(self):
return self.count
"""
used to create the dataset as up.
Args:
blind & noise: noise information
Return:
First split the file in matpath, place it in datapath, add noise information to it (blind & noise), inherit torch.utils.data.Dataset to get dataloader and return.
"""
def makedataloader(matpath, datapath, patch, strides, mode, channel_first, key, loader, batch_size=(64, 64), blind='no',
noise=30,
ratio=0.9, crop_size=None):
createdataset(matpath, datapath, ratio, patch, strides, mode, crop_size, channel_first, key, loader)
# noise type
transform_train = gettransform(blind, noise)
transform_test = gettransform(blind, noise)
traindataset = ICVLData(datapath + 'train/', len(glob.glob(datapath + 'train/*.mat')), transforms=transform_train,
train=True)
testdataset = ICVLData(datapath + 'test/', len(glob.glob(datapath + 'test/*.mat')), transforms=transform_test,
train=False)
trainDataloader = DataLoader(dataset=traindataset, batch_size=batch_size[0], shuffle=True, num_workers=2)
testDataloader = DataLoader(dataset=testdataset, batch_size=batch_size[1], shuffle=False, num_workers=2)
return trainDataloader, testDataloader