In [1]:
import os
from skimage.io import imread
import torch
import torch.nn.functional as F
import shutil
import random



In [2]:
# Directories
input_dir = "G:/Shared drives/Posner Group Current/Cole's Files/ANSA/RPA on glass slides/100_serial/processed"
output_dir = "H:\Datasetsinterpolated_torch_tensors"
os.makedirs(output_dir, exist_ok=True)

# Function to get all image paths and their labels
def get_image_paths_and_labels(root_dir):
    image_data = []
    for label in os.listdir(root_dir):
        label_path = os.path.join(root_dir, label)
        if os.path.isdir(label_path):
            for idx, image_name in enumerate(os.listdir(label_path)):
                if image_name.lower().endswith('.tif'):
                    image_path = os.path.join(label_path, image_name)
                    image_data.append((image_path, f"{label}_{idx+1}"))
    return image_data

# Get all image paths and labels
image_data = get_image_paths_and_labels(input_dir)

# Upsample/interpolate to 500x500
target_shape = (1, 180, 500, 500)

for img_path, label in image_data:
    # Load the image stack
    image_stack = imread(img_path)
    
    # Convert to tensor and add channel dimension [f, h, w] -> [1, f, h, w]
    tensor = torch.tensor(image_stack, dtype=torch.float32).unsqueeze(0)

    # Upsample to target size using bicubic interpolation
    upsampled_tensor = F.interpolate(
        tensor, 
        size=(500,500), 
        mode='bicubic', 
        align_corners=True
    )

    # Save with corrected filename
    output_path = os.path.join(output_dir, f"{label}.pt")
    torch.save(upsampled_tensor, output_path)
    
    print(f"Processed: {output_path} -> New shape: {upsampled_tensor.shape}")

print("Done")



Processed: H:\Datasetsinterpolated_torch_tensors\0_1.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\0_2.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\0_3.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\0_4.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\0_5.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\0_6.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\1000_1.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\1000_2.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\1000_3.pt -> New shape: torch.Size([1, 180, 500, 500])
Processed: H:\Datasetsinterpolated_torch_tensors\1000_4.pt -> New shape: torch.Siz

In [None]:
# # Repadding to make them square ([1,180,499,500] currently)
# input_folder = r'../Datasets/torch_tensors/'
# output_folder = input_folder  # Change if you want to save elsewhere

# # os.makedirs(output_folder, exist_ok=True)

# # Desired shape
# target_shape = (1, 180, 500, 500)

# # Process each tensor file
# for file_name in os.listdir(input_folder):
#     if file_name.endswith('.pt'):
#         file_path = os.path.join(input_folder, file_name)
        
#         tensor = torch.load(file_path)

#         _, _, current_height, current_width = tensor.shape
        
#         # Calculate padding amounts (last two dimensions: width and height)
#         pad_width = target_shape[-1] - current_width  # Padding for width (500 - 499)
#         pad_height = target_shape[-2] - current_height  # Padding for height (should be 0)

#         # Apply padding (pad format: last dimension first, so width then height)
#         padded_tensor = torch.nn.functional.pad(tensor, (0, pad_width, 0, pad_height), "constant", 0)

#         output_path = os.path.join(output_folder, file_name)
#         torch.save(padded_tensor, output_path)

#         print(f"Processed: {file_name} -> New shape: {padded_tensor.shape}")

# print("Done")


In [3]:
source_dir = 'H:/Datasets/interpolated_torch_tensors'
output_dir = 'H:/Datasets/int_Split'

random.seed(53)

train_dir = os.path.join(output_dir, 'Training')
test_dir = os.path.join(output_dir, 'Testing')
val_dir = os.path.join(output_dir, 'Validation')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

all_files = [f for f in os.listdir(source_dir) if f.endswith('.pt')]

random.shuffle(all_files)
print("Shuffle done")
num_files = len(all_files)
train_split = int(num_files * 0.60)
test_split = int(num_files * 0.20)

train_files = all_files[:train_split]
test_files = all_files[train_split:train_split + test_split]
val_files = all_files[train_split + test_split:]  # Remainder goes here

def copy_files(file_list, dest_dir):
    for file in file_list:
        shutil.copy(os.path.join(source_dir, file), os.path.join(dest_dir, file))

copy_files(train_files, train_dir)
print("Train copy done")

copy_files(test_files, test_dir)
print("Test copy done")

copy_files(val_files, val_dir)
print("Validation copy done")


print(f"Copied {len(train_files)} files to Training")
print(f"Copied {len(test_files)} files to Testing")
print(f"Copied {len(val_files)} files to Validation")


Shuffle done
Train copy done
Test copy done
Validation copy done
Copied 141 files to Training
Copied 47 files to Testing
Copied 48 files to Validation


In [4]:
# Define training directory and output category folders
Paths = ['H:/Datasets/int_Split/Testing/', 'H:/Datasets/int_Split/Training/',
  'H:/Datasets/int_Split/Validation/']

for Path in Paths:
    train_dir = Path
    category_dirs = {
        'undetectable': os.path.join(train_dir, 'undetectable'),
        'low': os.path.join(train_dir, 'low'),
        'medium': os.path.join(train_dir, 'medium'),
        'high': os.path.join(train_dir, 'high')
    }

    # Create category directories if they don't exist
    for dir_path in category_dirs.values():
        os.makedirs(dir_path, exist_ok=True)

    # Function to categorize based on label value
    def categorize_file(filename):
        try:
            label = int(filename.split('_')[0])  # Extract numeric label before underscore
            if label < 200:
                return 'undetectable'
            elif 200 <= label <= 1000:
                return 'low'
            elif 1000 < label <= 10000:
                return 'medium'
            else:
                return 'high'
        except ValueError:
            print(f"Skipping {filename}: Invalid format")
            return None

    # Process each file in the training directory
    for file in os.listdir(train_dir):
        if file.endswith('.pt'):
            category = categorize_file(file)
            if category:
                src_path = os.path.join(train_dir, file)
                dest_path = os.path.join(category_dirs[category], file)
                shutil.copy(src_path, dest_path)

    print("Files have been copied to respective categories:")
    for category, path in category_dirs.items():
        print(f"{category}: {len(os.listdir(path))} files")


Files have been copied to respective categories:
undetectable: 7 files
low: 11 files
medium: 16 files
high: 13 files
Files have been copied to respective categories:
undetectable: 37 files
low: 27 files
medium: 22 files
high: 55 files
Files have been copied to respective categories:
undetectable: 9 files
low: 14 files
medium: 8 files
high: 17 files
