In [11]:
### Importing libraries and modules

import numpy as np
from skimage import io
import matplotlib.pyplot as plt
import os                       

from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean

In [12]:
### Data location and directory control

control = True # if set to True, control values from the data processing will be printed

# set up the working directory to the chosen directory (raw string input), i.e., where the dataset is located
abs_path_loading_images=r'/Users/pbecerra/CAS/ADS_course/CAS_project/image_data/validation/images'
abs_path_saving_images =r'/Users/pbecerra/CAS/ADS_course/CAS_project/image_data/validation/images_small'
abs_path_loading_masks=r'/Users/pbecerra/CAS/ADS_course/CAS_project/image_data/validation/masks'
abs_path_saving_masks =r'/Users/pbecerra/CAS/ADS_course/CAS_project/image_data/validation/masks_small'

os.chdir(abs_path_loading_images) # set working directory to the location of the mscl-log
os.getcwd() # get the current working directory, to check if working in the right directory

'/Users/pbecerra/CAS/ADS_course/CAS_project/image_data/validation/images'

In [13]:
# List all jpg files in the current working directory.
files_dir = sorted([file for file in os.listdir(os.getcwd()) if file.endswith('.jpg')])

# Print the list of files and directories in the current working directory for quality control
print(files_dir) 

# Initialize with zeros a list called 'image_rescaled_grey_loaded' with the same length as the list of files and directories in the current directory
# This list will be used to store processed images or related data.
image_rescaled_grey_loaded = [0]*len(files_dir)

# Print the initialized list to check its contents for quality control
print(image_rescaled_grey_loaded)

['BASA1_016_1_A.jpg', 'BASA1_042_1_A.jpg', 'BASA1_083_1_A.jpg', 'BASA1_093_1_A.jpg', 'BASA1_123_1_A.jpg', 'BASA1_124_1_A.jpg', 'BASA1_133_1_A.jpg', 'BASA1_136_1_A.jpg', 'BASA1_156_1_A.jpg', 'BASA1_172_1_A.jpg', 'BASA1_202_1_A.jpg', 'BASA1_230_1_A.jpg']
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [14]:
### Load images from specified file directories, convert to grayscale, rescales to a specified width while maintaining the aspect ratio, and store the processed images in a list or array.

for i in range(len(image_rescaled_grey_loaded)):
    # Load an image from a file specified by the file directory.
    image = io.imread(files_dir[i])
    # Print shape of loaded image                                     
    #print(image.shape) 
    # Convert loaded image to grayscale
    image_gray =  color.rgb2gray(image) 
    # Rescale the grayscale image to a new width but keep the aspect ratio (352 pixels width).
    image_rescaled = rescale(image_gray, (352/1000), anti_aliasing=False)
    # Convert the rescaled image from a floating-point representation to an 8-bit unsigned integer representation.
    image_rescaled_grey = (image_rescaled*256).astype('uint8') 
    # Print the shape of the converted and rescaled image.   
    print(image_rescaled_grey.shape)
    # Store the converted and rescaled image in a list or array.
    image_rescaled_grey_loaded[i] = image_rescaled_grey

# Print the total number of processed images stored in 'image_rescaled_grey_loaded'.
print(len(image_rescaled_grey_loaded))

(2961, 352)
(2947, 352)
(2973, 352)
(1532, 352)
(3026, 352)
(3035, 352)
(3004, 352)
(2979, 352)
(2981, 352)
(2950, 352)
(2967, 352)
(1599, 352)
12


In [15]:
### Process each image in image_rescaled_grey_loaded, cropping it into batches of 352x352 pixels, padding them if necessary, and saving each batch as a separate PNG file. 

# Change the current working directory to the specified absolute path (location of the mscl log)
os.chdir(abs_path_saving_images) 
# Check the current working directory to ensure it's set correctly
os.getcwd()

# Iterate over the list of processed images
for i in range(len(image_rescaled_grey_loaded)):
    # Calculate the number of batches needed for the current image
    # Divide the height of the image by 352 and round up to get the number of batches.
    number_of_batches_per_image = np.ceil(image_rescaled_grey_loaded[i].shape[0]/352).astype(int)

    # Iterate over each batch of the current image
    for j in range(number_of_batches_per_image):
        # Crop the image to a size of 352x352
        # The crop starts at j*352 and ends at (j+1)*352 along the height. It covers the entire width of the image.
        image_crop = image_rescaled_grey_loaded[i][(j*352):(j+1)*352,0:]

        # If the cropped image shape is not (352, 352), pad it with zeros to make it (352, 352)
        if image_crop.shape != (352,352):
            # Print shape of current image when it's not 352x352
            print(image_crop.shape, j)
            # Pad the image with zeros to make its dimensions (352, 352).
            # 'np.pad' is a NumPy function used to pad arrays with values.
            image_crop = np.pad(image_crop, [(0, 352-image_crop.shape[0] ), (0, 352-image_crop.shape[1])], mode='constant', constant_values=0)  

        # Print shape of cropped image        
        # print(image_crop.shape)
        # Save the cropped image as a .PNG with a filename constructed from the original filename and batch number.
        io.imsave(files_dir[i][:-4]+'_'+str(j)+ '.png', image_crop) 
        

(145, 352) 8
(131, 352) 8
(157, 352) 8
(124, 352) 4
(210, 352) 8
(219, 352) 8
(188, 352) 8
(163, 352) 8
(165, 352) 8
(134, 352) 8
(151, 352) 8
(191, 352) 4


In [16]:
os.chdir(abs_path_loading_masks) # set working directory to the location of the mscl-log
os.getcwd() # get the current working directory, to check if working in the right directory

'/Users/pbecerra/CAS/ADS_course/CAS_project/image_data/validation/masks'

In [17]:
# List all tif files in the current working directory.
files_dir_mask = sorted([file for file in os.listdir(os.getcwd()) if file.endswith('.tif')])

# Print the list of files in the current working directory for quality control
print(files_dir_mask) 

# Initialize with zeros a list called 'mask_rescaled_loaded' with the same length as the list of files and directories in the current directory
# This list will be used to store processed masks.
mask_rescaled_loaded = [0]*len(files_dir_mask)

# Print the initialized list to check its contents for quality control
print(mask_rescaled_loaded)

['BASA1_016_1_A_mask.tif', 'BASA1_042_1_A_mask.tif', 'BASA1_083_1_A_mask.tif', 'BASA1_093_1_A_mask.tif', 'BASA1_123_1_A_mask.tif', 'BASA1_124_1_A_mask.tif', 'BASA1_133_1_A_mask.tif', 'BASA1_136_1_A_mask.tif', 'BASA1_156_1_A_mask.tif', 'BASA1_172_1_A_mask.tif', 'BASA1_202_1_A_mask.tif', 'BASA1_230_1_A_mask.tif']
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [18]:
### Load masks from specified file directories, rescale to a specified width while maintaining the aspect ratio, and store the processed masks in a list or array.

for i in range(len(mask_rescaled_loaded)):
    # Load a mask from a file specified by the file directory.
    image_m = io.imread(files_dir_mask[i])
    # Print shape of loaded mask image                                     
    #print(image.shape)  
    # Rescale the mask to a new width but keep the aspect ratio (352 pixels width).
    image_m_rescaled = rescale(image_m, (352/1000), order = 0, anti_aliasing=False)
    # Convert the rescaled mask from a floating-point representation to an integer representation.
    image_m_rescaled_int = image_m_rescaled.astype(int) 
    # Print the shape of the converted and rescaled mask image.   
    print(image_m_rescaled_int.shape)
    # Store the converted and rescaled mask in a list or array.
    mask_rescaled_loaded[i] = image_m_rescaled_int

# Print the total number of processed images stored in 'image_rescaled_grey_loaded'.
print(len(mask_rescaled_loaded))
print(np.unique(mask_rescaled_loaded[0]))

(2961, 352)
(2947, 352)
(2973, 352)
(1532, 352)
(3026, 352)
(3035, 352)
(3004, 352)
(2979, 352)
(2981, 352)
(2950, 352)
(2967, 352)
(1599, 352)
12
[0 1 5]


In [19]:
### Process each mask in mask_rescaled_loaded, cropping it into batches of 352x352 pixels, padding them if necessary, and saving each batch as a separate PNG file. 

# Change the current working directory to the specified absolute path (location of the mscl log)
os.chdir(abs_path_saving_masks) 
# Check the current working directory to ensure it's set correctly
os.getcwd()

# Iterate over the list of processed masks
for i in range(len(mask_rescaled_loaded)):
    # Calculate the number of batches needed for the current mask
    # Divide the height of the mask by 352 and round up to get the number of batches.
    number_of_batches_per_image = np.ceil(mask_rescaled_loaded[i].shape[0]/352).astype(int)
    
    # Iterate over each batch of the current mask image
    for j in range(number_of_batches_per_image):
        # Crop the mask image to a size of 352x352
        # The crop starts at j*352 and ends at (j+1)*352 along the height. It covers the entire width of the mask image.
        image_m_crop = mask_rescaled_loaded[i][(j*352):(j+1)*352,0:]
        print(image_m_crop)

        # If the cropped mask shape is not (352, 352), pad it with zeros to make it (352, 352)
        if image_m_crop.shape != (352,352):
            # Print shape of current mask when it's not 352x352
            print(image_m_crop.shape, j)
            # Pad with zeros to make its dimensions (352, 352).
            # 'np.pad' is a NumPy function used to pad arrays with values.
            image_m_crop = np.pad(image_m_crop, [(0, 352-image_m_crop.shape[0] ), (0, 352-image_m_crop.shape[1])], mode='constant', constant_values=0)  

        # Remap classes according to new scheme
        remap_dict = {0: 0, 1: 1, 2: 2, 3: 3, 4: 3, 5: 4, 6: 4, 7: 4}
        remap_vectorized = np.vectorize(remap_dict.get)
        image_m_crop = remap_vectorized(image_m_crop)
        
        # Print shape of cropped mask image        
        # print(image_m_crop.shape)
        # Save the cropped mask image with a filename constructed from the original filename and batch number + _mask.tif.
        save_path = files_dir_mask[i][:-8]+str(j)+ '_mask.tif'
        print("Saving to:", save_path)
        io.imsave(save_path, image_m_crop, check_contrast=False)
        

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Saving to: BASA1_016_1_A_0_mask.tif
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 5 5 0]
 [0 0 0 ... 5 5 0]
 [0 0 0 ... 5 5 0]]
Saving to: BASA1_016_1_A_1_mask.tif
[[0 0 0 ... 5 5 0]
 [0 0 0 ... 5 5 0]
 [0 0 0 ... 5 5 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Saving to: BASA1_016_1_A_2_mask.tif
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Saving to: BASA1_016_1_A_3_mask.tif
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]]
Saving to: BASA1_016_1_A_4_mask.tif
[[0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Saving to: BASA1_016_1_A_5_mask.tif
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [

In [20]:
import glob
mask_paths = glob.glob(os.path.join(abs_path_saving_masks, "*_mask.tif"))

# Sort (optional, for consistency)
mask_paths.sort()

# Loop over each mask and print its unique values
for path in mask_paths:
    mask = io.imread(path)
    print(f"{os.path.basename(path)}: {np.unique(mask)}")

BASA1_016_1_A_0_mask.tif: [0 1 4]
BASA1_016_1_A_1_mask.tif: [0 4]
BASA1_016_1_A_2_mask.tif: [0 1 4]
BASA1_016_1_A_3_mask.tif: [0 1]
BASA1_016_1_A_4_mask.tif: [0 1]
BASA1_016_1_A_5_mask.tif: [0 1]
BASA1_016_1_A_6_mask.tif: [0 1]
BASA1_016_1_A_7_mask.tif: [0 1]
BASA1_016_1_A_8_mask.tif: [0 1]
BASA1_042_1_A_0_mask.tif: [0 1]
BASA1_042_1_A_1_mask.tif: [0 1]
BASA1_042_1_A_2_mask.tif: [0 1]
BASA1_042_1_A_3_mask.tif: [0 1]
BASA1_042_1_A_4_mask.tif: [0 1]
BASA1_042_1_A_5_mask.tif: [0 1]
BASA1_042_1_A_6_mask.tif: [0 1 4]
BASA1_042_1_A_7_mask.tif: [0 1 4]
BASA1_042_1_A_8_mask.tif: [0]
BASA1_083_1_A_0_mask.tif: [0 4]
BASA1_083_1_A_1_mask.tif: [0 4]
BASA1_083_1_A_2_mask.tif: [0 4]
BASA1_083_1_A_3_mask.tif: [0 4]
BASA1_083_1_A_4_mask.tif: [0 4]
BASA1_083_1_A_5_mask.tif: [0 4]
BASA1_083_1_A_6_mask.tif: [0 4]
BASA1_083_1_A_7_mask.tif: [0 4]
BASA1_083_1_A_8_mask.tif: [0 4]
BASA1_093_1_A_0_mask.tif: [0 3]
BASA1_093_1_A_1_mask.tif: [0 3]
BASA1_093_1_A_2_mask.tif: [0 3]
BASA1_093_1_A_3_mask.tif: [0 3]
BA

Possible next steps to hand in: 
- In this notebook, re-map the classes, such that you end up with only 5 classes: 
    - 0 = background
    - 1 = Gravel
    - 2 = Fines
    - 3, 4 = 3 = diamicts
    - 5, 6, 7 = 4 = sands

- In the Unet model, change n_classes to 5

- In the model run notebook (Main Project Notebook), change n_classes to 5. Try run. 