From 69577ddbf38ac3a6ca053128b53309bc0bb35e70 Mon Sep 17 00:00:00 2001 From: mengxuez Date: Mon, 30 Nov 2020 15:18:13 +0800 Subject: [PATCH 1/2] support image normalization In this commit, common normalization methods are realized in the normalise_image function of utils.py and a normalization argument is added in the main.py. --- README.md | 2 ++ datasets.py | 13 +++++++------ main.py | 17 ++++++++++++++++- utils.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 71 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index b1779ab..527305c 100644 --- a/README.md +++ b/README.md @@ -148,5 +148,7 @@ Examples: This runs on GPU a basic 4-layers fully connected neural network on the Pavia University dataset, using 10% of the samples for training. * `python main.py --model hamida --dataset PaviaU --training_sample 0.5 --patch_size 7 --epoch 50 --cuda` This runs on GPU the 3D CNN from Hamida et al. on the Pavia University dataset with a patch size of 7, using 50% of the samples for training and optimizing for 50 epochs. + * `python main.py --model li --dataset IndianPines --training_sample 0.5 --normalization SNB --cuda` + This runs on GPU the 3D CNN from Li et al. on the Indian Pines dataset with the default patch size of 5, using 50% of the samples for training with the SNB Normalization method. [![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/nshaud) diff --git a/datasets.py b/datasets.py index da1a8e2..4492b4b 100644 --- a/datasets.py +++ b/datasets.py @@ -18,7 +18,7 @@ # Python 2 from urllib import urlretrieve -from utils import open_file +from utils import open_file, normalise_image DATASETS_CONFIG = { "PaviaC": { @@ -96,7 +96,7 @@ def update_to(self, b=1, bsize=1, tsize=None): self.update(b * bsize - self.n) # will also set self.n = b * bsize -def get_dataset(dataset_name, target_folder="./", datasets=DATASETS_CONFIG): +def get_dataset(dataset_name, normalization_method, target_folder="./", datasets=DATASETS_CONFIG): """Gets the dataset specified by name and return the related components. Args: dataset_name: string with the name of the dataset @@ -303,6 +303,10 @@ def get_dataset(dataset_name, target_folder="./", datasets=DATASETS_CONFIG): palette, ) = CUSTOM_DATASETS_CONFIG[dataset_name]["loader"](folder) + # Normalization + img = np.asarray(img, dtype="float32") + img = normalise_image(img, method=normalization_method) + # Filter NaN out nan_mask = np.isnan(img.sum(axis=-1)) if np.count_nonzero(nan_mask) > 0: @@ -312,11 +316,8 @@ def get_dataset(dataset_name, target_folder="./", datasets=DATASETS_CONFIG): img[nan_mask] = 0 gt[nan_mask] = 0 ignored_labels.append(0) - ignored_labels = list(set(ignored_labels)) - # Normalization - img = np.asarray(img, dtype="float32") - img = (img - np.min(img)) / (np.max(img) - np.min(img)) + return img, gt, label_values, ignored_labels, rgb_bands, palette diff --git a/main.py b/main.py index 98055c4..a3c30b2 100644 --- a/main.py +++ b/main.py @@ -129,6 +129,19 @@ help="Path to the test set (optional, by default " "the test_set is the entire ground truth minus the training)", ) +group_dataset.add_argument( + "--normalization", + type=str, + default='MNI', + help="Normalization method to use for image preprocessing. Available:\n" + "None: Applying none preprocessing." + "L2NS: Normalizing with a unit Euclidean norm along each sample." + "MNB: Converting the dynamics to [0, 1] along each band." + "SNB: Normalizing first- and second-order moments along each band." + "MNI: Converting the dynamics to [0, 1] along the whole image." + "SNI: Normalizing first- and second-order moments along the whole image.", +) + # Training options group_train = parser.add_argument_group("Training") group_train.add_argument( @@ -225,6 +238,8 @@ # Testing ground truth file TEST_GT = args.test_set TEST_STRIDE = args.test_stride +# Normalization method +NORM_METHOD = args.normalization if args.download is not None and len(args.download) > 0: for dataset in args.download: @@ -238,7 +253,7 @@ hyperparams = vars(args) # Load the dataset -img, gt, LABEL_VALUES, IGNORED_LABELS, RGB_BANDS, palette = get_dataset(DATASET, FOLDER) +img, gt, LABEL_VALUES, IGNORED_LABELS, RGB_BANDS, palette = get_dataset(DATASET, NORM_METHOD, FOLDER) # Number of classes N_CLASSES = len(LABEL_VALUES) # Number of bands (last dimension of the image tensor) diff --git a/utils.py b/utils.py index 331937d..53acd19 100644 --- a/utils.py +++ b/utils.py @@ -222,9 +222,52 @@ def get_random_pos(img, window_shape): return x1, x2, y1, y2 -def padding_image(image, patch_size=None, mode="symmetric", constant_values=0): +def normalise_image(image, method='None', epsilon=1e-7): + """Normalise an image. + + Args: + image: A 3D image with a shape of [h, w, b] or 2D image with a shape of [h * w, b], + The array to pad + method: optional, used to normalise a HSI, default is None, + including six categories as follows: + 1) None: Applying none preprocessing. + 2) L2NS: Normalizing with a unit Euclidean norm along each sample. + 3) MNB: Converting the dynamics to [0, 1] along each band. + 4) SNB: Normalizing first- and second-order moments along each band. + 5) MNI: Converting the dynamics to [0, 1] along the whole image. + 6) SNI: Normalizing first- and second-order moments along the whole image. + epsilon: optional, default is 1e-7, small float added to variance to avoid dividing by zero. + Returns: + normalised_image with the same shape of the input + + """ + allowed_methods = ['NONE', 'L2NS', 'MNB', 'SNB', 'MNI', 'SNI'] + method = method.upper() + if method not in allowed_methods: + raise ValueError('unknown normalization method "%s"' % (method,)) + + shp = image.shape + if len(shp) >= 3: + image = image.reshape(-1, shp[-1]) + + if method == 'L2NS': + image = image / (np.linalg.norm(image, axis=1, keepdims=True) + epsilon) + elif method == 'MNB': + image = (image - np.min(image, axis=0, keepdims=True)) / \ + (np.max(image, axis=0, keepdims=True) - np.min(image, axis=0, keepdims=True) + epsilon) + elif method == 'SNB': + image = (image - np.mean(image, axis=0, keepdims=True)) / (np.std(image, axis=0, keepdims=True) + epsilon) + elif method == 'MNI': + image = (image - np.min(image, axis=(0, 1), keepdims=True)) / \ + (np.max(image, axis=(0, 1), keepdims=True) - np.min(image, axis=(0, 1), keepdims=True) + epsilon) + elif method == 'SNI': + image = (image - np.mean(image, axis=(0, 1), keepdims=True)) / (np.std(image, axis=(0, 1), keepdims=True) + epsilon) + + return np.reshape(image, shp) + + +def padding_image(image, patch_size=None, mode="symmetric", **kwargs): """Padding an input image. - Modified at 2020.11.16. If you find any issues, please email at mengxue_zhang@hhu.edu.cn with details. Args: image: 2D+ image with a shape of [h, w, ...], @@ -233,8 +276,6 @@ def padding_image(image, patch_size=None, mode="symmetric", constant_values=0): The patch size of the algorithm mode: optional, str or function, default is "symmetric", Including 'constant', 'reflect', 'symmetric', more details see np.pad() - constant_values: optional, sequence or scalar, default is 0, - Used in 'constant'. The values to set the padded values for each axis Returns: padded_image with a shape of [h + patch_size[0] // 2 * 2, w + patch_size[1] // 2 * 2, ...] @@ -245,7 +286,7 @@ def padding_image(image, patch_size=None, mode="symmetric", constant_values=0): w = patch_size[1] // 2 pad_width = [[h, h], [w, w]] [pad_width.append([0, 0]) for i in image.shape[2:]] - padded_image = np.pad(image, pad_width, mode=mode, constant_values=constant_values) + padded_image = np.pad(image, pad_width, mode=mode, **kwargs) return padded_image From 4f97700371abcd28d8e04392673b3b623b64e513 Mon Sep 17 00:00:00 2001 From: snowzm Date: Fri, 4 Dec 2020 09:00:34 +0800 Subject: [PATCH 2/2] update the get_dataset function in the datasets.py using a default normalization_method --- datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets.py b/datasets.py index 4492b4b..35ccd0b 100644 --- a/datasets.py +++ b/datasets.py @@ -96,7 +96,7 @@ def update_to(self, b=1, bsize=1, tsize=None): self.update(b * bsize - self.n) # will also set self.n = b * bsize -def get_dataset(dataset_name, normalization_method, target_folder="./", datasets=DATASETS_CONFIG): +def get_dataset(dataset_name, normalization_method="MNI", target_folder="./", datasets=DATASETS_CONFIG): """Gets the dataset specified by name and return the related components. Args: dataset_name: string with the name of the dataset