### Necessary Imports

In [1]:
import os
import numpy as np
import pandas as pd
import shutil

# The path can also be read from a config file, etc.
OPENSLIDE_PATH = r'E:\KSA Project\data_preprocessing\openslide-bin-4.0.0.3-windows-x64\bin'

import os
if hasattr(os, 'add_dll_directory'):
    # Windows
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide

from WSI_Stiching_Code.wsi_core.WholeSlideImage import WholeSlideImage

### First Filter Out Slides (WSIs) having labels in Ground Truth File
We downloaded around 600 slides form TCGA portal but we have labels of aroudn 425 slides. We get these labels from the supplementry material of IDARS. Moreover when data is downloaded its in zip format, we are saving the slides that has labels  into a new folder. 

In [None]:
# Paths
wsi_dir = 'E:\Aamir Gulzar\WSI_data_619x458'
labels_file = 'E:\Aamir Gulzar\dataset\labels.csv'
output_dir = 'E:\Aamir Gulzar\dataset\svs_files'

# Load labels
labels_df = pd.read_csv(labels_file)
# Create a dictionary of patient IDs with labels
patient_labels_dict = {row['PATIENT']: row['label'] for _, row in labels_df.iterrows()}

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to copy images with matching labels
def copy_images_with_labels(src_dir, dst_dir, patient_labels):
    for root, dirs, files in os.walk(src_dir):
        for file in files:
            if file.endswith('.svs'):
                # Take the first 12 characters of the patient ID and match it with labels
                patient_id = file[:12]
                label = patient_labels.get(patient_id)
                if label:
                    src_path = os.path.join(root, file)
                    # Add label to the file name
                    dst_path = os.path.join(dst_dir, f"{patient_id}_{label}.svs")
                    shutil.copy(src_path, dst_path)
                    print(f'Copied {src_path} to {dst_path}')

# Copy the images
copy_images_with_labels(wsi_dir, output_dir, patient_labels_dict)
print('Finished copying images with labels.')

### WSIs magnification and Dimensions checking 

In [7]:
def get_wsi_magnification(slide):
    try:
        magnification = slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]
        return float(magnification)
    except KeyError:
        raise ValueError("Magnification information not available in the WSI properties.")
	

def get_wsi_magnification_dimensions(slide, WSI_object):
    """
    Get the magnification and dimensions of a WSI image using OpenSlide properties.
    """
    objective_power = slide.properties.get('openslide.objective-power')
    mag = get_wsi_magnification(WSI_object.getOpenSlide())
    scale_factor = mag / 5.0    # Get microns per pixel (x and y)
    wsi = WSI_object.getOpenSlide()
    best_level = wsi.get_best_level_for_downsample(scale_factor)
    mpp_x = slide.properties.get('openslide.mpp-x','Unknown')
    mpp_y = slide.properties.get('openslide.mpp-y','Unknown')
    print(f'WSI Objective power/Magnification: {objective_power} of {slide}')
    print(f'WSI Objective power/Magnification using WSI_object: {mag} scale factor {scale_factor} and best level {best_level}')
    print(f"WSI dimensions: {slide.dimensions}")
    print(f'wsi level downsample: {slide.level_downsamples} and wsi level dimensions: {slide.level_dimensions}')    
    print(f'wsi levels WSI_object: {WSI_object.level_downsamples} and wsi level dimensions: {WSI_object.level_dim}')
    print(f"Microns per Pixel (X): {mpp_x}")
    print(f"Microns per Pixel (Y): {mpp_y}")
    if objective_power:
        return int(objective_power)
    else:
        print(f'This WSI does not contain objective power information.')
        return None

def load_wsi_images(input_dir):
    """
    Read all WSIs in the input directory to print its magnification and dimensions.
    """    
    for root, dirs, files in os.walk(input_dir):
        for wsi_file in files:
            if wsi_file.endswith('.svs') or wsi_file.endswith('.tiff'):
                wsi_path = os.path.join(root, wsi_file)
                # Ensure the file exists
                if not os.path.isfile(wsi_path):
                    print(f"File not found: {wsi_path}")
                    continue
                try:
                    slide = openslide.OpenSlide(wsi_path)
                    WSI_object = WholeSlideImage(wsi_path)
                    wsi_magnification = get_wsi_magnification_dimensions(slide, WSI_object)
                except Exception as e:
                    print(f"Error loading WSI: {wsi_path} - {e}")

# Example usage
input_dir = 'E:\\KSA Project\\dataset\\svs_files'
load_wsi_images(input_dir)

WSI Objective power/Magnification: 40 of OpenSlide('E:\\KSA Project\\dataset\\svs_files\\TCGA-3L-AA1B_nonMSIH.svs')
WSI Objective power/Magnification using WSI_object: 40.0 scale factor 8.0 and best level 1
WSI dimensions: (95615, 74462)
wsi level downsample: (1.0, 4.000116473747436, 16.002759635885248, 32.01163799220526) and wsi level dimensions: ((95615, 74462), (23903, 18615), (5975, 4653), (2987, 2326))
wsi levels WSI_object: [(1.0, 1.0), (4.000125507258503, 4.000107440236368), (16.002510460251045, 16.00300881151945), (32.01037830599263, 32.012897678417886)] and wsi level dimensions: ((95615, 74462), (23903, 18615), (5975, 4653), (2987, 2326))
Microns per Pixel (X): 0.25269999999999998
Microns per Pixel (Y): 0.25269999999999998
WSI Objective power/Magnification: 40 of OpenSlide('E:\\KSA Project\\dataset\\svs_files\\TCGA-A6-2671_nonMSIH.svs')
WSI Objective power/Magnification using WSI_object: 40.0 scale factor 8.0 and best level 1
WSI dimensions: (101680, 36748)
wsi level downsampl