In [2]:
import numpy as np
import os

import pandas as pd

import matplotlib.pyplot as plt
from PIL import Image

# Cropping images

In [37]:
def split_npz_file_into_16_parts(npz_file_path, output_dir):
    npz_data = np.load(npz_file_path, allow_pickle=True)
    
    sample = npz_data['sample']                   # Extracting image and metadata for it
    channels = npz_data['channels'].item()  
    filenames = npz_data['filenames'].item()
    
    img_height, img_width, num_channels = sample.shape  # Expecting (2500, 2500, 5)
   # print(f"(img_height, img_width, num_channels)=({img_height}, {img_width}, {num_channels})")
    
    tile_height = img_height // 4
    tile_width = img_width // 4
    
    for i in range(4):  
        for j in range(4):  
            split_sample = np.zeros((tile_height, tile_width, num_channels), dtype=sample.dtype)
            
            for c in range(num_channels):
                split_sample[:, :, c] = sample[i * tile_height:(i + 1) * tile_height,
                                               j * tile_width:(j + 1) * tile_width,
                                               c]
            
            base_name = os.path.splitext(os.path.basename(npz_file_path))[0]
            split_filename = f"{base_name}_part_{i}_{j}.npz"
            split_file_path = os.path.join(output_dir, split_filename)
            
            np.savez(split_file_path, sample=split_sample, channels=channels, filenames=filenames)
    
    print(f"Splitting and saving completed for {npz_file_path}.")

In [38]:
input_dir = '/share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full'
output_dir = '/share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz'  
os.makedirs(output_dir, exist_ok=True)

npz_files = [f for f in os.listdir(input_dir) if f.endswith('.npz')]

for npz_file in npz_files:
    path = input_dir + "/" + npz_file
#    print(path)
    split_npz_file_into_16_parts(path, output_dir)

Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full/P102785-A02-1.npz.
Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full/P102785-A02-2.npz.
Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full/P102785-A02-3.npz.
Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full/P102785-A02-4.npz.
Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full/P102785-A02-5.npz.
Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz_full/P102785-A02-6.npz.
Splitting and saving completed for /share/data/analyses/silvija/RT/data_cloome/our_images/prep

In [35]:
# Checking if dimensions are good
npz_data = np.load("/share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/channels_tiff_npz/P102785-A02-1_part_0_0.npz", allow_pickle=True)
    
sample = npz_data['sample']  
channels = npz_data['channels'].item()  
filenames = npz_data['filenames'].item()
    
img_height, img_width, num_channels = sample.shape  # Expecting (2500, 2500, 5)
print(f"(img_height, img_width, num_channels)=({img_height}, {img_width}, {num_channels})")

(img_height, img_width, num_channels)=(625, 625, 5)


# Manipulating metadata file

In [46]:
metadata_file = '/share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_all/metadata_P102785_ALL.csv'
metadata_df = pd.read_csv(metadata_file)
    
positions = [(i, j) for i in range(4) for j in range(4)]

expanded_rows = []
for _, row in metadata_df.iterrows():
    for i, j in positions:
        new_row = row.copy()
        new_row['CROP_POS'] = f"{i}-{j}"
        expanded_rows.append(new_row)

expanded_metadata_df = pd.DataFrame(expanded_rows)

site_index = expanded_metadata_df.columns.get_loc('SITE')
expanded_metadata_df.insert(site_index + 1, 'CROP_POS', expanded_metadata_df.pop('CROP_POS'))

#print(expanded_metadata_df)

expanded_metadata_df.to_csv("/share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_crop/metadata_P102785_ALL.csv", index=False)

In [22]:
# Renaming for testing if it will work only with one crop 

odir = '/share/data/analyses/silvija/RT/data_cloome/our_images/preprocessing_all_singleCrop/channels_tiff_npz'  
os.makedirs(odir, exist_ok=True)

npz_files = [f for f in os.listdir(odir) if f.endswith('.npz')]

for npz_file in npz_files:
    parts = npz_file.split("-")
    name = "-".join(parts[:-2]) + ".npz"
    
    old_path = odir + "/" + npz_file
    new_path = odir + "/" + name

    os.rename(old_path, new_path)
    print(f"Renamed : {name}")


Renamed : P102785-A10-6.npz
Renamed : P102785-A19-4.npz
Renamed : P102785-C05-7.npz
Renamed : P102785-C10-2.npz
Renamed : P102785-C10-8.npz
Renamed : P102785-C15-6.npz
Renamed : P102785-C24-1.npz
Renamed : P102785-C24-9.npz
Renamed : P102785-D06-4.npz
Renamed : P102785-D19-7.npz
Renamed : P102785-D19-9.npz
Renamed : P102785-D22-9.npz
Renamed : P102785-D23-8.npz
Renamed : P102785-E03-1.npz
Renamed : P102785-E03-2.npz
Renamed : P102785-E06-7.npz
Renamed : P102785-E06-8.npz
Renamed : P102785-E06-9.npz
Renamed : P102785-E12-1.npz
Renamed : P102785-F10-9.npz
Renamed : P102785-F12-4.npz
Renamed : P102785-F14-3.npz
Renamed : P102785-F17-6.npz
Renamed : P102785-G08-3.npz
Renamed : P102785-G11-4.npz
Renamed : P102785-G20-2.npz
Renamed : P102785-G20-6.npz
Renamed : P102785-G20-8.npz
Renamed : P102785-G24-7.npz
Renamed : P102785-H18-5.npz
Renamed : P102785-H18-8.npz
Renamed : P102785-H22-2.npz
Renamed : P102785-H22-5.npz
Renamed : P102785-H22-6.npz
Renamed : P102785-I06-3.npz
Renamed : P102785-I0