In [27]:
import numpy as np
import rasterio
from rasterio.enums import Resampling
from skimage.transform import resize
import matplotlib.pyplot as plt
import os
import shutil
import pdb
from tqdm import tqdm
import random
import tifffile
tqdm.pandas()

In [20]:
# Define the paths to the train, validation, and test directories
train_dir = '/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/train'
validation_dir = '/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/validation'
test_dir = '/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/test'
random_synthetic_dir = '/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic'

# Get the list of subfolder names in the train directory
subfolder_names = [f for f in os.listdir(train_dir) if f.startswith('GSD')]

# Iterate over the subfolders
for subfolder_name in subfolder_names:
    subfolder_train_path = os.path.join(train_dir, subfolder_name)
    subfolder_validation_path = os.path.join(validation_dir, subfolder_name)
    subfolder_test_path = os.path.join(test_dir, subfolder_name)
    subfolder_random_synthetic_path = os.path.join(random_synthetic_dir, subfolder_name)

    # Create the subfolder in random_synthetic directory if it doesn't exist
    os.makedirs(subfolder_random_synthetic_path, exist_ok=True)

    # Get the list of files in each subfolder that start with 'row'
    train_files = [f for f in os.listdir(subfolder_train_path) if f.startswith('row')]
    validation_files = [f for f in os.listdir(subfolder_validation_path) if f.startswith('row')]
    test_files = [f for f in os.listdir(subfolder_test_path) if f.startswith('row')]
    
    train_files_sample = random.sample(train_files, 3000)
    validation_files_sample = random.sample(validation_files, 3000)
    test_files_sample = random.sample(test_files, 3000)

    # Combine and copy the files to random_synthetic subfolder
    # for file_name in tqdm(train_files_sample):
    #     file_path = os.path.join(subfolder_train_path, file_name)
    #     shutil.copy(file_path, subfolder_random_synthetic_path)

    for file_name in tqdm(validation_files):
        file_path = os.path.join(subfolder_validation_path, file_name)
        shutil.copy(file_path, subfolder_random_synthetic_path)

    for file_name in tqdm(test_files):
        file_path = os.path.join(subfolder_test_path, file_name)
        shutil.copy(file_path, subfolder_random_synthetic_path)

100%|██████████| 9053/9053 [01:00<00:00, 148.66it/s]
100%|██████████| 10271/10271 [01:12<00:00, 141.51it/s]
100%|██████████| 3369/3369 [00:17<00:00, 188.01it/s]
100%|██████████| 9461/9461 [01:15<00:00, 125.65it/s]
100%|██████████| 11974/11974 [01:51<00:00, 107.41it/s]
100%|██████████| 9132/9132 [01:13<00:00, 123.90it/s]
100%|██████████| 11614/11614 [01:34<00:00, 122.93it/s]
100%|██████████| 10570/10570 [01:40<00:00, 104.69it/s]
100%|██████████| 26836/26836 [04:12<00:00, 106.49it/s]
100%|██████████| 30980/30980 [05:07<00:00, 100.79it/s]
100%|██████████| 14405/14405 [02:18<00:00, 104.33it/s]
100%|██████████| 33654/33654 [05:26<00:00, 103.20it/s]
100%|██████████| 19556/19556 [03:01<00:00, 107.80it/s]
100%|██████████| 12747/12747 [02:08<00:00, 99.53it/s] 
100%|██████████| 18095/18095 [02:43<00:00, 110.60it/s]
100%|██████████| 18838/18838 [03:05<00:00, 101.73it/s]
100%|██████████| 4616/4616 [00:44<00:00, 103.88it/s]
100%|██████████| 7809/7809 [01:16<00:00, 102.38it/s]
100%|██████████| 9770/

In [50]:
directory = '/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic'  # Replace with the actual path to the directory
prefix = 'GSD'  # Replace with the desired starting string

for folder in os.listdir(directory):
    if folder.startswith(prefix):
        dir_path = os.path.join(directory, folder)
        print(dir_path)
        counter = 0
        current_gsd = float(''.join(filter(str.isdigit, folder)))/100
        percentage = 5  # Replace with the desired percentage
        list_files = [f for f in os.listdir(dir_path)]
        sample_size = int(len(list_files) * (percentage / 100.0))
        #seed_value = 42  # Replace with your desired seed value
        random.seed(seed_value)
        sample_files = random.sample(list_files, sample_size)

        for filename in tqdm(sample_files):
            counter += 1
            image_path = os.path.join(directory + "/" + folder, filename)
            # Open the original image
            with rasterio.open(image_path) as src:
                # Read the image data
                image_data = src.read()

                # Calculate the resampling factors
                desired_gsd = random.randint((current_gsd * 100) - 5, (current_gsd * 100) + 5)/100.0
                resampling_factor = current_gsd / desired_gsd

                # Resample the image for each desired GSD
                # Compute the new image size based on the resampling factor
                new_height = int(src.height * resampling_factor)
                #print(f"new height: {new_height}")
                new_width = int(src.width * resampling_factor)
                #print(f"new width: {new_width}")

                # Resample the image using the desired GSD
                resampled_data = src.read(
                    out_shape=(src.count, new_height, new_width),
                    resampling=Resampling.bilinear
                )

                # Create a new TIF file name based on the desired GSD
                new_image_path = f"resampled_image.tif"

                # Update metadata for the resampled image
                new_meta = src.meta.copy()
                new_meta.update({
                    'width': new_width,
                    'height': new_height,
                    #'transform': src.transform * src.transform.scale(pixel_factor, pixel_factor)
                })

            # Save the resampled image
            with rasterio.open(new_image_path, 'w', **new_meta) as dst:
                dst.write(resampled_data)
            # Open the original image
            with rasterio.open(new_image_path) as dst:
                # Read the image data
                image_data = dst.read()
                new_gsd = desired_gsd
                pixel_factor = current_gsd / new_gsd
                new_height = int(dst.height / pixel_factor)
                #print(f"new height: {new_height}")
                new_width = int(dst.width / pixel_factor)
                #print(f"new width: {new_width}")
                # Resample the image using the desired GSD
                new_resampled_data = dst.read(
                    out_shape=(dst.count, new_height, new_width),
                    resampling=Resampling.bilinear
                )
                
                # Calculate the desired size
                desired_size = (256, 256)
                
                # Resize the resampled_data array to the desired size
                resampled_data_resized = resize(new_resampled_data, (new_resampled_data.shape[0],) + desired_size, mode='reflect', anti_aliasing=True)

                # Update metadata for the resampled image
                new_height = desired_size[0]
                new_width = desired_size[1]
                final_meta = dst.meta.copy()
                final_meta.update({
                    'width': new_width,
                    'height': new_height,
                    #'transform': rasterio.Affine(resampling_factor, 0, 0, 0, resampling_factor, 0)
                })


                # Create a new TIF file name based on the desired GSD
                final_image_path = f"/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/{folder}/random_{new_gsd}m_{filename[:-4]}.tif"
                tifffile.imsave(final_image_path, resampled_data_resized)

/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_100cm


100%|██████████| 967/967 [01:09<00:00, 13.97it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_124cm


100%|██████████| 641/641 [00:46<00:00, 13.88it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_150cm


100%|██████████| 1055/1055 [01:13<00:00, 14.40it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_175cm


100%|██████████| 1109/1109 [01:16<00:00, 14.57it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_200cm


100%|██████████| 2594/2594 [03:02<00:00, 14.25it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_250cm


100%|██████████| 2078/2078 [02:28<00:00, 14.01it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_300cm


100%|██████████| 1601/1601 [01:54<00:00, 14.03it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_50cm


100%|██████████| 1846/1846 [02:15<00:00, 13.62it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_65cm


100%|██████████| 621/621 [00:45<00:00, 13.50it/s]


/work/scorreacardo_umass_edu/DeepSatGSD/data/processed/random_synthetic/GSD_80cm


100%|██████████| 936/936 [01:07<00:00, 13.79it/s]


In [None]:
# Define the current GSD and desired GSD values
directory = '/work/scorreacardo_umass_edu/DeepSatGSD/data/interim/GSD_50cm'
current_gsd = 0.5  # meters per pixel

#0.75m, 1m, 1.25m, 1.5m, and 2m
desired_gsd_values = [0.80, 1.0, 1.5, 1.75, 2.0, 2.5, 3]  # meters per pixel
dic_filename_gsd_values = {0.80:"GSD_80cm",
                          1.0:"GSD_100cm",
                          1.5:"GSD_150cm",
                          1.75:"GSD_175cm",
                          2.0:"GSD_200cm",
                          2.5:"GSD_250cm",
                          3:"GSD_300cm"}
counter = 0
for filename in os.listdir(directory):
    counter += 1
    print(f"This is image number: {counter}")
    image_path = os.path.join(directory, filename)
    # Open the original image
    with rasterio.open(image_path) as src:
        # Read the image data
        image_data = src.read()

        # Calculate the resampling factors
        resampling_factors = [current_gsd / desired_gsd for desired_gsd in desired_gsd_values]

        # Resample the image for each desired GSD
        for i, factor in enumerate(resampling_factors):
            # Compute the new image size based on the resampling factor
            new_height = int(src.height * factor)
            #print(f"new height: {new_height}")
            new_width = int(src.width * factor)
            #print(f"new width: {new_width}")

            # Resample the image using the desired GSD
            resampled_data = src.read(
                out_shape=(src.count, new_height, new_width),
                resampling=Resampling.bilinear
            )
            # Display the resampled image
            plt.imshow(resampled_data.transpose(1, 2, 0))
            plt.title(f"Resampled Image: GSD = {desired_gsd_values[i]} meters/pixel")
            plt.show()

            # Create a new TIF file name based on the desired GSD
            new_image_path = f"resampled_image_{new_gsd}m.tif"

            # Update metadata for the resampled image
            new_meta = src.meta.copy()
            new_meta.update({
                'width': new_width,
                'height': new_height,
                #'transform': src.transform * src.transform.scale(pixel_factor, pixel_factor)
            })

            # Save the resampled image
            with rasterio.open(new_image_path, 'w', **new_meta) as dst:
                dst.write(resampled_data)
            # Open the original image
            with rasterio.open(new_image_path) as dst:
                # Read the image data
                image_data = dst.read()
                new_gsd = desired_gsd_values[i]
                pixel_factor = current_gsd / new_gsd
                new_height = int(dst.height / pixel_factor)
                print(f"new height: {new_height}")
                new_width = int(dst.width / pixel_factor)
                print(f"new width: {new_width}")
                # Resample the image using the desired GSD
                new_resampled_data = dst.read(
                    out_shape=(dst.count, new_height, new_width),
                    resampling=Resampling.bilinear
                )
                # if i == 0 or i==6:
                # Display the resampled image
                plt.imshow(new_resampled_data.transpose(1, 2, 0))
                plt.title(f"Resampled Image: GSD = {new_gsd} meters/pixel")
                plt.show()

                # Create a new TIF file name based on the desired GSD
                final_image_path = f"/work/scorreacardo_umass_edu/DeepSatGSD/data/interim/{dic_filename_gsd_values[desired_gsd_values[i]]}/{filename[:-4]}_resampled_image_{new_gsd}m.tif"
                pdb.set_trace()
                # Update metadata for the resampled image
                final_meta = dst.meta.copy()
                final_meta.update({
                    'width': new_width,
                    'height': new_height,
                    #'transform': dst.transform * dst.transform.scale(pixel_factor, pixel_factor)
                })
                # Save the resampled image
                with rasterio.open(final_image_path, 'w', **final_meta) as final_dst:
                    final_dst.write(new_resampled_data)