<a href="https://colab.research.google.com/github/realtechsupport/cocktail/blob/main/sandbox/multi_image_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. create functions to preprocess the input image:
2. the function should have normalization, resize, creating patches
3. all the images should be converted into lists of patches
4. these lists are sampled and useful lists are created
5. then combine all the sampled into to bigger list,
4. then a numpy array


In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [1]:
!pip install rasterio
import numpy as np
import rasterio
import keras
import tensorflow as tf

Collecting rasterio
  Downloading rasterio-1.3.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m65.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Installing collected packages: snuggs, affine, rasterio
Successfully installed affine-2.4.0 rasterio-1.3.8 snuggs-1.4.7


In [3]:
#preprocessing
def preprocessing(filelocation):
    # Load the GeoTIFF file
    with rasterio.open(filelocation) as src:
        # Read the TIFF data
        tiff_data = src.read()

        # Get the shape of the TIFF data
        num_bands, height, width = tiff_data.shape

        print("Original image dimensions:", num_bands, height, width)

        print(np.min(tiff_data), np.max(tiff_data))


        normalized_image = np.zeros_like(tiff_data, dtype='float32')
        for band in range(tiff_data.shape[1]):
            band_min = np.min(tiff_data[:, band])
            band_max = np.max(tiff_data[:, band])
            normalized_image[:, band] = (tiff_data[:, band] - band_min) / (band_max - band_min)

        # Calculate the new width and height that are multiples of the patch size
        patch_size = 256  # Replace with your desired patch size
        new_width = int(np.floor(width / patch_size)) * patch_size
        new_height = int(np.floor(height / patch_size)) * patch_size

        print("cropped dimensions:", new_height, new_width)

        input_image = np.moveaxis(normalized_image, 0, -1)

        # Crop the input_image to the new dimensions
        cropped_array = input_image[:new_height, :new_width, :]

    print("Cropped array shape:", cropped_array.shape)
    print(np.min(cropped_array), np.max(cropped_array))

    patches = []
    for i in range(0, cropped_array.shape[0], patch_size):
        for j in range(0, cropped_array.shape[1], patch_size):
            patch = cropped_array[i:i+patch_size, j:j+patch_size]
            patches.append(patch)
    print("patches are created")
    return patches

In [4]:
def sampling(training_images, mask_array):
    useful_images = []
    useful_masks = []
    useless = 0
    indexes = []
    for img in range(len(training_images)):
        img_name=training_images[img]
        mask_name = mask_array[img]

        val, counts = np.unique(mask_name, return_counts=True)

        if (1 - (counts[0]/counts.sum())) > 0.05:
          useful_images.append(img_name)
          useful_masks.append(mask_name)
          indexes.append(img)
          print("I am useful")

        else:
          #print("I am useless")
          useless +=1


    print("Total useful images are: ", len(training_images)-useless)
    print(indexes)
    print("Total useless images are: ", useless)

    return useful_images

In [5]:
def combine_lists(lists_of_arrays):
    """
    Combine lists of arrays into a single list.

    Args:
    lists_of_arrays (list): A list of lists, where each inner list contains arrays.

    Returns:
    combined_list (list): A single list containing all arrays from the input lists.
    """
    combined_list = []

    for arr_list in lists_of_arrays:
        combined_list.extend(arr_list)

    return combined_list

# # Example usage:
# list1 = [np.array([1, 2, 3]), np.array([4, 5, 6])]
# list2 = [np.array([7, 8, 9]), np.array([10, 11, 12])]

# lists_of_arrays = [list1, list2]
# combined_list = combine_lists(lists_of_arrays)

# print(combined_list)



In [6]:
#create mask_array

def preprocess_mask(filelocation):


    with rasterio.open(filelocation) as src:
        # Read the TIFF data
        output_mask = src.read()
        num_bands, height, width = src.shape

        # Calculate the new width and height that are multiples of the patch size
        patch_size = 256  # Replace with your desired patch size
        new_width = int(np.floor(width / patch_size)) * patch_size
        new_height = int(np.floor(height / patch_size)) * patch_size

        print("cropped dimensions:", new_height, new_width)

        output_mask = np.moveaxis(output_mask, 0, -1)

        # Crop the input_image to the new dimensions
        cropped_mask = output_mask[:new_height, :new_width, :]

        print("Cropped array shape:", cropped_mask.shape)

        new_mask = np.squeeze(cropped_mask)

    masks = []
    for i in range(0, new_mask.shape[0], patch_size):
        for j in range(0, new_mask.shape[1], patch_size):
            patch = new_mask[i:i+patch_size, j:j+patch_size]
            masks.append(patch)
    return masks


In [None]:
#combine masks
#apply one hot encoding
from tensorflow.keras.utils import to_categorical
def onehotencoding(labels, num_classes=23):
    return to_categorical(labels, num_classes)
mask_array = np.array(masks)

1. load each image, preprocess and create patches
2. load the mask and create mask-array
3. sample each image-patch with mask
4. get the useful patches
5. combine useful patches and convert into one numpy array
6. combine masks and convert them into one numpy array


In [7]:
patch_1 = preprocessing('/content/gdrive/MyDrive/exp/other images/public-archivedwl-5/area2_0123_2023_8bands.tif')

Original image dimensions: 8 4085 4686
1.0 10888.0
cropped dimensions: 3840 4608
Cropped array shape: (3840, 4608, 8)
0.0 1.0
patches are created


In [12]:
patch_1 = preprocessing('/content/gdrive/MyDrive/exp/other images/public-archivedwl-5/area2_0516_2023_8bands.tif')

Original image dimensions: 8 4093 4691
0.0 9013.286


  normalized_image[:, band] = (tiff_data[:, band] - band_min) / (band_max - band_min)


cropped dimensions: 3840 4608
Cropped array shape: (3840, 4608, 8)
nan nan
patches are created


In [9]:
patch_3 = preprocessing('/content/gdrive/MyDrive/exp/other images/public-archivedwl-5/area2_0516_2023_8bands_composite.tif')

Original image dimensions: 8 4013 4591
0 9977


  normalized_image[:, band] = (tiff_data[:, band] - band_min) / (band_max - band_min)


cropped dimensions: 3840 4352
Cropped array shape: (3840, 4352, 8)
0.0 1.0
patches are created


In [10]:
patch_4 = preprocessing('/content/gdrive/MyDrive/exp/other images/public-archivedwl-5/area2_0530_2022_8bands.tif')

Original image dimensions: 8 4019 4618
1.0 10610.0
cropped dimensions: 3840 4608
Cropped array shape: (3840, 4608, 8)
0.0 1.0
patches are created


In [11]:
patch_5 = preprocessing('/content/gdrive/MyDrive/exp/other images/public-archivedwl-5/area2_0617_2023_8bands.tif')

Original image dimensions: 8 4093 4691
0.0 14912.223


  normalized_image[:, band] = (tiff_data[:, band] - band_min) / (band_max - band_min)


cropped dimensions: 3840 4608
Cropped array shape: (3840, 4608, 8)
nan nan
patches are created
