In [None]:
import numpy as np
from utils import *
#from model import *
import rasterio as rio 
import os
import matplotlib.pyplot as plt
import subprocess
from PIL import Image
from tqdm import tqdm
from utils import *
import torch 
import icecream 
import tqdm
import PIL
%matplotlib inline
%load_ext autoreload

##### Observe the full images
We load train and test data, rebuild the full images using the merge_raster.py file and save them

In [36]:
file_path = "glaciers_mapping_downsampled"
data_paths = {}

#build paths for each pipeline
for pipeline in ["train", "test"]:
    for date in [0,1]:
        data_paths[f"{pipeline}_date{date}"] = f"{file_path}/{pipeline}/date{date}"
    data_paths[f"{pipeline}_gt"] = f"{file_path}/{pipeline}/gt"
    


In [None]:
#reconstruct full images
for key, path in data_paths.items():
    output_path = f"{file_path}/{key}_merged.tif"
    # Execute the command to merge rasters using subprocess and save them
    command = ["python", "glaciers_mapping_downsampled/merge_rasters.py", "-i", path, "-o", output_path]
    subprocess.run(command)

Then we save the RGB images in png to be able to observe them and (maybe) draw useful conclusions

In [None]:
for key in data_paths.keys():
    img_path = f"{file_path}/{key}_merged"
    img = rio.open(f"{img_path}.tif").read()
    if key[-2:] == "gt":
        normalized_image = img  #(img - np.min(img)) / (np.max(img) - np.min(img))
        normalized_image = np.squeeze(normalized_image)  # Ensure it's a single-channel image
        rgb_image = (normalized_image * 255).astype(np.uint8)
    else:
        normalized_image = (img - np.min(img)) / (
                                np.max(img) - np.min(img))
        rgb_image = (normalized_image[:3] * 255).astype(np.uint8).transpose(1, 2, 0)
    
    pil_image = Image.fromarray(rgb_image)
    save_path = f"full_rgb_images/{key}.png"
    pil_image.save(save_path)

### Loading all patches

We store the patches in dictionaries : {( (position_x_of_patch, position_y_of_patch) : numpy_array_of_the_patch )}

At this stage the np array of patches are of dimension (band, pixel_x, pixel_y) i.e. (4,128,128)

In [37]:
patches_train0 = get_organized_dict_of_patches(f"{data_paths['train_date0']}")
patches_train1 = get_organized_dict_of_patches(f"{data_paths['train_date1']}")
patches_test0 = get_organized_dict_of_patches(f"{data_paths['test_date0']}")
patches_test1 = get_organized_dict_of_patches(f"{data_paths['test_date1']}")

patches_train_gt = get_organized_dict_of_patches(f"{data_paths['train_gt']}")
patches_test_gt = get_organized_dict_of_patches(f"{data_paths['test_gt']}")

100%|██████████| 1768/1768 [00:21<00:00, 83.11it/s]
100%|██████████| 1768/1768 [00:22<00:00, 77.95it/s]
100%|██████████| 351/351 [00:03<00:00, 102.71it/s]
100%|██████████| 351/351 [00:03<00:00, 109.59it/s]
100%|██████████| 1768/1768 [00:17<00:00, 98.88it/s] 
100%|██████████| 351/351 [00:01<00:00, 187.49it/s]


### Preprocessing

In [None]:
def convert_to_shape_pixels_by_bands(data):
    num_dimensions = len(data.shape)
    assert(num_dimensions == 2 or num_dimensions == 3)
    if num_dimensions == 3:
        num_bands = data.shape[0]
        return data.reshape((-1, num_bands))
    else:
        return data


In [None]:
def compute_average_feature(data):
    # If needed convert data to the shape (num_pixels x num_bands)
    data_2d = convert_to_shape_pixels_by_bands(data)
    # Get the number of bands
    num_bands = data_2d.shape[1]
    avg_features = np.zeros(num_bands)
    for b in range(num_bands):
        # Compute the average value of each band (use the function np.mean)
        avg_features[b] = np.mean(data_2d[:, b])
    return avg_features

In [None]:
def compute_standard_deviation_feature(data):
    # If needed convert data to the shape (num_pixels x num_bands)
    data_2d = convert_to_shape_pixels_by_bands(data)
    # Compute the standard deviation feature (using the numpy function np.std)
    #       as in the function compute_average_feature iterate over the bands
    #       and compute one value for each band
    num_bands = data_2d.shape[1]
    avg_features = np.zeros(num_bands)
    for b in range(num_bands):
        avg_features[b] = np.std(data_2d[:, b])
    return avg_features

In [None]:
def normalized_patches(data):
    #Compute the mean, the standard deviation of the patch
    patch = convert_to_shape_pixels_by_bands(data)
    patch_avg = compute_average_feature(data)
    patch_std = compute_standard_deviation_feature(data)
    
    #Normalize the patch
    normalized_image = (patch - patch_avg) / patch_std
    
    return normalized_image

In [48]:
patches_train0= {i+1: valeur for i, (_,valeur) in enumerate(patches_train0.items())}
patches_train1= {i+1: valeur for i, (_,valeur) in enumerate(patches_train1.items())}
patches_test0= {i+1: valeur for i, (_,valeur) in enumerate(patches_test0.items())}
patches_test1= {i+1: valeur for i, (_,valeur) in enumerate(patches_test1.items())}
patches_train_gt= {i+1: valeur for i, (_,valeur) in enumerate(patches_train_gt.items())}
patches_test_gt= {i+1: valeur for i, (_,valeur) in enumerate(patches_test_gt.items())}

In [65]:
#Normalize the train set
patch_train0_norm={}
patch_train1_norm={}
for i in range(1, len(patches_train0)):
    normalized_train0 = normalized_patches(patches_train0[i]) 
    normalized_train1 = normalized_patches(patches_train1[i]) 
    
    patch_train0_norm[i] = normalized_train0
    patch_train1_norm[i] = normalized_train1


In [66]:
#Normalize the test set
patch_test0_norm={}
patch_test1_norm={}
for i in range(1, len(patches_test0)):
    normalized_test0 = normalized_patches(patches_test0[i]) 
    normalized_test1 = normalized_patches(patches_test1[i]) 
    
    patch_test0_norm[i] = normalized_test0
    patch_test1_norm[i] = normalized_test1

### Data Augmentation