In [1]:
import cv2
import os
import glob
from PIL import Image
import shutil
import random

In [2]:
import os
from PIL import Image

def resize_and_save_images(dir):
    for root, dirs, files in os.walk(dir):
        if os.path.basename(root) not in ["HR", "LR"]:
            for file in files:
                if file.lower().endswith(('.jpg')):
                    file_path = os.path.join(root, file)
                    try:
                        with Image.open(file_path) as img:
                            lr_img = img.resize((256, 256))
                            lr_path = os.path.join(dir, "LR", file)
                            lr_img.save(lr_path)

                            hr_img = img.resize((512, 512))
                            hr_path = os.path.join(dir, "HR", file)
                            hr_img.save(hr_path)

                            print(f"Processed image: {file_path}")
                    except Exception as e:
                        print(f"Error processing image: {file_path}, Error: {str(e)}")

dir = "/content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data"



resize_and_save_images(dir)

Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T083559_N0510_R064_T33KYV_20240505T113620-ql.jpg
Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T083559_N0510_R064_T34KBE_20240505T113620-ql.jpg
Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T083559_N0510_R064_T33KYU_20240505T113620-ql.jpg
Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T083559_N0510_R064_T33KYR_20240505T113620-ql.jpg
Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T083559_N0510_R064_T33JYN_20240505T113620-ql.jpg
Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T083559_N0510_R064_T33JWM_20240505T113620-ql.jpg
Processed image: /content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/S2B_MSIL2A_20240505T08355

In [3]:
import os
import shutil
import random

def print_directory_structure(directory):
    for root, dirs, files in os.walk(directory):
        level = root.replace(directory, '').count(os.sep)
        indent = ' ' * 4 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for file in files:
            print(f"{subindent}{file}")

def split_dataset(hr_dir, lr_dir, train_ratio=0.9):
    remote_sensing_dir = "remote_sensing"
    os.makedirs(remote_sensing_dir, exist_ok=True)

    train_dir = os.path.join(remote_sensing_dir, "train")
    test_dir = os.path.join(remote_sensing_dir, "test")
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    train_hr_dir = os.path.join(train_dir, "HR")
    train_lr_dir = os.path.join(train_dir, "LR")
    test_hr_dir = os.path.join(test_dir, "HR")
    test_lr_dir = os.path.join(test_dir, "LR")
    os.makedirs(train_hr_dir, exist_ok=True)
    os.makedirs(train_lr_dir, exist_ok=True)
    os.makedirs(test_hr_dir, exist_ok=True)
    os.makedirs(test_lr_dir, exist_ok=True)

    hr_files = os.listdir(hr_dir)

    random.shuffle(hr_files)

    split_index = int(len(hr_files) * train_ratio)

    train_hr_files = hr_files[:split_index]
    test_hr_files = hr_files[split_index:]

    for file in train_hr_files:
        src_path = os.path.join(hr_dir, file)
        dst_path = os.path.join(train_hr_dir, file)
        shutil.copy(src_path, dst_path)

    for file in test_hr_files:
        src_path = os.path.join(hr_dir, file)
        dst_path = os.path.join(test_hr_dir, file)
        shutil.copy(src_path, dst_path)

    for file in train_hr_files:
        src_path = os.path.join(lr_dir, file)
        dst_path = os.path.join(train_lr_dir, file)
        if os.path.exists(src_path):
            shutil.copy(src_path, dst_path)

    for file in test_hr_files:
        src_path = os.path.join(lr_dir, file)
        dst_path = os.path.join(test_lr_dir, file)
        if os.path.exists(src_path):
            shutil.copy(src_path, dst_path)

    print("Dataset split and structured successfully!")
    print_directory_structure(remote_sensing_dir)


hr_directory = "/content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/HR"
lr_directory = "/content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data/LR"

split_dataset(hr_directory, lr_directory)

Dataset split and structured successfully!
remote_sensing/
    test/
        LR/
            S2B_MSIL2A_20240505T083559_N0510_R064_T33KYQ_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T33JXK_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T34KBE_20240505T113620-ql.jpg
        HR/
            S2B_MSIL2A_20240505T083559_N0510_R064_T33KYQ_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T33JXK_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T34KBE_20240505T113620-ql.jpg
    train/
        LR/
            S2B_MSIL2A_20240505T083559_N0510_R064_T33KZS_20240505T113620-ql.jpg
            S2B_MSIL1C_20240501T085559_N0510_R007_T33KUV_20240501T124015-ql.jpg
            S2B_MSIL2A_20240501T085559_N0510_R007_T33KUV_20240501T133454-ql.jpg
            S2B_MSIL2A_20240502T082559_N0510_R021_T34KBU_20240502T113605-ql.jpg
            S2A_MSIL2A_20240503T084601_N0510_R107_T33KXV_20240503T140450-ql.jpg
    

In [4]:
import os
import shutil

def remove_unmatched_images(directory):
    for subset in ["test", "train"]:
        lr_dir = os.path.join(directory, subset, "LR")
        hr_dir = os.path.join(directory, subset, "HR")

        lr_files = os.listdir(lr_dir)
        hr_files = os.listdir(hr_dir)

        lr_files = [os.path.splitext(file)[0] for file in lr_files]
        hr_files = [os.path.splitext(file)[0] for file in hr_files]

        unmatched_hr_files = set(hr_files) - set(lr_files)

        for file in unmatched_hr_files:
            hr_file_path = os.path.join(hr_dir, file + ".jpg")
            if os.path.exists(hr_file_path):
                os.remove(hr_file_path)
                print(f"Removed unmatched HR image: {hr_file_path}")

        unmatched_lr_files = set(lr_files) - set(hr_files)

        for file in unmatched_lr_files:
            lr_file_path = os.path.join(lr_dir, file + ".jpg")
            if os.path.exists(lr_file_path):
                os.remove(lr_file_path)
                print(f"Removed unmatched LR image: {lr_file_path}")
    print_directory_structure(dataset_directory)


dataset_directory = "remote_sensing"

remove_unmatched_images(dataset_directory)

remote_sensing/
    test/
        LR/
            S2B_MSIL2A_20240505T083559_N0510_R064_T33KYQ_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T33JXK_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T34KBE_20240505T113620-ql.jpg
        HR/
            S2B_MSIL2A_20240505T083559_N0510_R064_T33KYQ_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T33JXK_20240505T113620-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R064_T34KBE_20240505T113620-ql.jpg
    train/
        LR/
            S2B_MSIL2A_20240505T083559_N0510_R064_T33KZS_20240505T113620-ql.jpg
            S2B_MSIL1C_20240501T085559_N0510_R007_T33KUV_20240501T124015-ql.jpg
            S2B_MSIL2A_20240501T085559_N0510_R007_T33KUV_20240501T133454-ql.jpg
            S2B_MSIL2A_20240502T082559_N0510_R021_T34KBU_20240502T113605-ql.jpg
            S2A_MSIL2A_20240503T084601_N0510_R107_T33KXV_20240503T140450-ql.jpg
            S2B_MSIL2A_20240505T083559_N0510_R0

In [5]:
!cp -r "/content/remote_sensing" "/content/drive/MyDrive/csc_496FinalProject/Data/Copernicus_data"