In [1]:
#load the modules
import os
import time
import numpy as np
import cv2
import glob
import pandas as pd
import random
import torch
import PIL
from PIL import Image
from matplotlib import pyplot as plt 
import os
from sklearn.decomposition import PCA
import math
from itertools import cycle , islice


In [2]:
def preprocess_annons(path_to_csv):
    df = pd.read_csv(path_to_csv, sep=',', names=['image_path', 'xmin', 'ymin', 'xmax', 'ymax', 'label'])
    return df
    
def reformat(filename):
    return f'{filename[0:42][:-6]}GS.jpg'

def compute_similarity(img1, img2):
    """Compute SSIM between two images, handling grayscale and RGB cases."""
    if len(img1.shape) == 3:  # RGB image
        img1_gray = np.mean(img1, axis=2)
    else:  # Already grayscale
        img1_gray = img1

    if len(img2.shape) == 3:
        img2_gray = np.mean(img2, axis=2)
    else:
        img2_gray = img2

    return ssim(img1_gray, img2_gray, data_range=img2_gray.max() - img2_gray.min())

In [3]:
import os
import glob
import math
import numpy as np
from PIL import Image
from itertools import cycle, islice
from skimage.metrics import structural_similarity as ssim
import pandas as pd

# -------------------------------
# Helper: Preprocess annotation CSVs.
# (Define your own function or adjust as needed.)
def preprocess_annons(csv_path):
    # Example: read CSV with pandas; assumes a column "image_path"
    df = pd.read_csv(csv_path)
    return df

# -------------------------------

def gram_schmidt_fusion_rgb(multispectral, pseudo_rgb, alpha=2):
    """
    Fuse the multispectral information into a pseudo-RGB image using an intensity substitution approach,
    with an adjustable multispectral boost factor.

    Args:
        multispectral (np.ndarray): Array of shape (H, W, C_ms) from all bands.
        pseudo_rgb (np.ndarray): Array of shape (H, W, 3) composed from selected bands (e.g., R, G, B).
        alpha (float): Boost factor to increase the multispectral impact.

    Returns:
        np.ndarray: Fused pseudo-RGB image (H, W, 3) as uint8.
    """
    pseudo_rgb = pseudo_rgb.astype(np.float32)
    multispectral = multispectral.astype(np.float32)

    intensity_rgb = np.mean(pseudo_rgb, axis=-1, keepdims=True)
    intensity_ms = np.mean(multispectral, axis=-1, keepdims=True)
    # Boost the multispectral intensity before computing the ratio
    ratio = (alpha * intensity_ms) / (intensity_rgb + 1e-8)  # avoid division by zero

    fused = pseudo_rgb * ratio
    fused = np.clip(fused, 0, 255).astype(np.uint8)
    return fused

# -------------------------------
# Define output directory and create subdirectories for train, val, test
out_dir = "/kaggle/working/GS_transformed"
os.makedirs(out_dir, exist_ok=True)

train_dir = os.path.join(out_dir, "train")
val_dir = os.path.join(out_dir, "valid")
test_dir = os.path.join(out_dir, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# ------------------------------- change here 
# Load annotation CSVs and extract lists of image basenames
TRAIN_ANNOTATIONS_FILE_PATH = "/kaggle/input/new-split/train_annotations.csv"
VAL_ANNOTATIONS_FILE_PATH = "/kaggle/input/new-split/val_annotations.csv"
TEST_ANNOTATIONS_FILE_PATH = "/kaggle/input/new-split/test_annotations.csv"
# -------------------------------
file_paths = [TRAIN_ANNOTATIONS_FILE_PATH, VAL_ANNOTATIONS_FILE_PATH, TEST_ANNOTATIONS_FILE_PATH]
train_paths = []
val_paths = []
test_paths = []
i = 0
for f in file_paths:
    annotations = preprocess_annons(f)
    # Assumes the CSV has a column "image_path" with full file names.
    unique_names = annotations["image_path"].unique().tolist()
    # Optionally, remove the header element if necessary
    if i == 0:
        train_paths = [os.path.basename(name) for name in unique_names]
    elif i == 1:
        test_paths = [os.path.basename(name) for name in unique_names]
    elif i == 2:
        val_paths = [os.path.basename(name) for name in unique_names]
    i += 1

# -------------------------------
# Main processing of images for GS fusion
current_imgs_dir = "/kaggle/input/all-images-preprocessed/all_processed_imgs"
os.chdir(current_imgs_dir)

types = ('*.JPG', '*.TIF')
files_grabbed = []
for pattern in types:
    files_grabbed.extend(glob.glob(pattern))
    
input_imgs = sorted(files_grabbed)  # process first 100 images
i = cycle(input_imgs)
slc = 5  # number of images per group

# Process images in groups of 5
for _ in range(math.ceil(len(input_imgs) / slc)):
    cur_imgs = list(islice(i, slc))
    if len(cur_imgs) < slc:
        break
        
    # Determine destination folder based on current image's basename.
    base_name = os.path.basename(cur_imgs[0][:-4]) + '_GS.jpg'


    # Check in train, test, and val lists
    if base_name in train_paths:
        dest_folder = train_dir
        #print(f"training image {base_name} found!")
    elif base_name in val_paths:
        dest_folder = val_dir
    elif base_name in test_paths:
        dest_folder = test_dir
    else:
        continue
    
    # Load images from current group
    ref_img = np.asarray(Image.open(cur_imgs[1]))
    target_jpg = np.asarray(Image.open(cur_imgs[0])) * 0.125
    target_nir = np.asarray(Image.open(cur_imgs[2])) 
    target_r = np.asarray(Image.open(cur_imgs[3])) 
    target_re = np.asarray(Image.open(cur_imgs[4])) 
    
    # Stack images to form multispectral and pseudo-RGB inputs
    fused_imgs = np.dstack((ref_img, target_jpg, target_nir, target_r, target_re))
    # Example pseudo-RGB: using target_r for red, ref_img for green, and target_nir for blue.
    pseudo_rgb = np.dstack((target_r, ref_img, target_nir))
    
    fused_pseudo_rgb = gram_schmidt_fusion_rgb(multispectral=fused_imgs, pseudo_rgb=pseudo_rgb)
    


    
    # Check in train, test, and val lists
    if base_name in train_paths:
        dest_folder = train_dir
    elif base_name in val_paths:
        dest_folder = val_dir
    elif base_name in test_paths:
        dest_folder = test_dir
    
    output_filename = os.path.join(dest_folder, f"{os.path.splitext(base_name)[0]}.jpg")
    Image.fromarray(fused_pseudo_rgb).save(output_filename)
    # Optionally, print confirmation:
    # print(f"Saved {output_filename}")

print("All images transformed and saved into train, test, and val folders.")


All images transformed and saved into train, test, and val folders.


In [4]:
import shutil
import os

TRAIN_ANNOTATIONS_FILE_PATH = "/kaggle/input/new-split/train_annotations.csv"
VAL_ANNOTATIONS_FILE_PATH = "/kaggle/input/new-split/val_annotations.csv"
TEST_ANNOTATIONS_FILE_PATH = "/kaggle/input/new-split/test_annotations.csv"

shutil.copyfile(TRAIN_ANNOTATIONS_FILE_PATH, train_dir + "/_annotations.csv")
shutil.copyfile(VAL_ANNOTATIONS_FILE_PATH, val_dir + "/_annotations.csv")
shutil.copyfile(TEST_ANNOTATIONS_FILE_PATH, test_dir + "/_annotations.csv")


# os.rename("/kaggle/working/GS_transformed/train/train_annotations.csv", "/kaggle/working/GS_transformed/train/_annotations.csv")
# os.rename("/kaggle/working/GS_transformed/valid/val_annotations.csv", "/kaggle/working/GS_transformed/valid/_annotations.csv")
# os.rename("/kaggle/working/GS_transformed/test/test_annotations.csv", "/kaggle/working/GS_transformed/test/_annotations.csv")


'/kaggle/working/GS_transformed/test/_annotations.csv'