In [None]:
# cd your_location
cd c:/ml/

# Import libraries, and check parametres

In [1]:
import os
import shutil
import tempfile
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from collections import OrderedDict

import torch
import torch.nn as nn


# import necessary libraries
import glob
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image



from monai.transforms import (
    AsDiscrete,
    Compose,
    CropForegroundd,
    LoadImaged,
    Orientationd,
    RandFlipd,
    RandCropByPosNegLabeld,
    RandShiftIntensityd,
    ScaleIntensityRanged,
    Spacingd,
    RandRotate90d,
    EnsureTyped,
)
from monai.data import (
    ThreadDataLoader,
    CacheDataset,
    load_decathlon_datalist,
    decollate_batch,
    set_track_meta,
)
from monai.inferers import sliding_window_inference
from monai.networks.nets import SwinUNETR, AttentionUnet
from monai.metrics import DiceMetric
from monai.losses import DiceCELoss
import torch
import einops
import warnings



# Check the amount of shared memory
os.system('df -h /dev/shm')

import os
dir = os.getcwd()
print('Current directory is:',dir)


warnings.filterwarnings("ignore")


Filesystem      Size  Used Avail Use% Mounted on
shm             128G     0  128G   0% /dev/shm
Current directory is: /media/Swin_UNETR_48


## Check if the CUDA is present

In [2]:
import torch
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available() 
torch.cuda.device_count()
torch.cuda.current_device()
torch.cuda.get_device_name(0)

'NVIDIA A40'

# Create a structure of the dataset in JSON

## Convert tif files located in images and masks to numpy, if necessary 

In [3]:
import os
import numpy as np
import glob
import rasterio

# Create directories to save the numpy arrays
os.makedirs('./data/npy_images', exist_ok=True)
os.makedirs('./data/npy_masks', exist_ok=True)

# Function to convert TIFF to NumPy and save, replacing NaNs with zeros
def convert_tiff_to_npy(image_path, output_dir):
    with rasterio.open(image_path) as image:
        np_image = image.read()  # Read the image data
        np_image = np.nan_to_num(np_image)  # Replace NaNs with zeros
    output_path = os.path.join(output_dir, os.path.basename(image_path).replace('.tif', '.npy'))
    np.save(output_path, np_image)
    return output_path

# Convert all images
image_files = glob.glob('./data/images/*.tif')
mask_files = glob.glob('./data/masks/*.tif')

converted_images = [convert_tiff_to_npy(img, './data/npy_images') for img in image_files]
converted_masks = [convert_tiff_to_npy(msk, './data/npy_masks') for msk in mask_files]

print("Conversion to NumPy arrays completed.")


Conversion to NumPy arrays completed.


# Proceed with the further step of data creation

In [36]:
import json
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from monai.networks.nets import SwinUNETR
from monai.losses import DiceLoss
import torch
from torch.nn.functional import sigmoid
from collections import OrderedDict

class NumpyDataset(Dataset):
    def __init__(self, image_paths, label_paths):
        self.image_paths = image_paths
        self.label_paths = label_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = np.load(self.image_paths[idx])
        label = np.load(self.label_paths[idx])
        image = torch.tensor(image, dtype=torch.float32)  # Shape: [1, 13, 48, 48]
        label = torch.tensor(label, dtype=torch.float32)  # Shape: [1, 48, 48] with channel dimension
        return image, label

def load_dataset_json(json_path):
    with open(json_path, 'r') as file:
        dataset_json = json.load(file)
    return dataset_json

def prepare_data_loaders(train_paths, val_paths, batch_size):
    train_images = [item['image'] for item in train_paths]
    train_labels = [item['label'] for item in train_paths]
    val_images = [item['image'] for item in val_paths]
    val_labels = [item['label'] for item in val_paths]

    train_dataset = NumpyDataset(train_images, train_labels)
    val_dataset = NumpyDataset(val_images, val_labels)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

    return train_loader, val_loader

## Create a JSON

In [40]:
import glob
import json
import pprint
import random

# Specify the dataset JSON structure. THe number of classes 1 (binary): 0 - background, 1 - levee
dataset_json = {
    "labels": {
        "0": "background",
        "1": "levee"
    },
    "tensorImageSize": "2D",
    "training": [],
    "validation": [],
    "testing": []
}

# Retrieve all mask paths
masks_paths = sorted(glob.glob('data/npy_masks/*.npy'))

# Shuffle paths to ensure randomness
random.shuffle(masks_paths)

# Calculate split indices
total_count = len(masks_paths)
train_end = int(0.6 * total_count)
val_end = train_end + int(0.2 * total_count)

# Split the data
train_paths = masks_paths[:train_end]
val_paths = masks_paths[train_end:val_end]
test_paths = masks_paths[val_end:]

# Populate the dataset JSON with training, validation, and testing data
for path in train_paths:
    filename = os.path.splitext(os.path.basename(path))[0]
    dataset_json["training"].append({
        "image": f'data/npy_images/{filename}.npy',
        "label": f'data/npy_masks/{filename}.npy',
        "tileid": f'{filename}',
    })

for path in val_paths:
    filename = os.path.splitext(os.path.basename(path))[0]
    dataset_json["validation"].append({
        "image": f'data/npy_images/{filename}.npy',
        "label": f'data/npy_masks/{filename}.npy',
        "tileid": f'{filename}',
    })

for path in test_paths:
    filename = os.path.splitext(os.path.basename(path))[0]
    dataset_json["testing"].append({
        "image": f'data/npy_images/{filename}.npy',
        "label": f'data/npy_masks/{filename}.npy',
        "tileid": f'{filename}',
    })

# Write the dataset JSON to a file
datasets = 'data/dataset.json'
with open(datasets, 'w') as outfile:
    json.dump(dataset_json, outfile, indent=4)

# Print the dataset JSON for verification
pprint.pprint(dataset_json)


{'labels': {'0': 'background', '1': 'levee'},
 'tensorImageSize': '2D',
 'testing': [{'image': 'data/npy_images/CFE_selected_L5_S2_S1_PCA_48_a_25.npy',
              'label': 'data/npy_masks/CFE_selected_L5_S2_S1_PCA_48_a_25.npy',
              'tileid': 'CFE_selected_L5_S2_S1_PCA_48_a_25'},
             {'image': 'data/npy_images/CFE_selected_L5_S2_S1_PCA_48_a_49.npy',
              'label': 'data/npy_masks/CFE_selected_L5_S2_S1_PCA_48_a_49.npy',
              'tileid': 'CFE_selected_L5_S2_S1_PCA_48_a_49'},
             {'image': 'data/npy_images/CFE_selected_L5_S2_S1_PCA_48_a_189.npy',
              'label': 'data/npy_masks/CFE_selected_L5_S2_S1_PCA_48_a_189.npy',
              'tileid': 'CFE_selected_L5_S2_S1_PCA_48_a_189'},
             {'image': 'data/npy_images/CFE_selected_L5_S2_S1_PCA_48_a_121.npy',
              'label': 'data/npy_masks/CFE_selected_L5_S2_S1_PCA_48_a_121.npy',
              'tileid': 'CFE_selected_L5_S2_S1_PCA_48_a_121'},
             {'image': 'data/npy_image