#### Similarity visualization against informative tiles vs all tiles with a slide

In [168]:
import h5py
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2
from sklearn.metrics.pairwise import cosine_similarity
Image.MAX_IMAGE_PIXELS = None  
# ---------- CONFIG ----------

  # Color map for similarity
def visualization_bg_filtered(slide, base_image_path, features_h5_path, important_h5_path, output_image_path, tile_size=224, alpha=0.5):
    colormap = plt.cm.jet
# ---------- 1. LOAD BASE IMAGE ----------
    base_img = Image.open(base_image_path).convert("RGBA")
    base_np = np.array(base_img)  # Convert to NumPy for easy manipulation
    
    # ---------- 2. LOAD COORDS & FEATURES ----------
    with h5py.File(feature_h5_path, "r") as f:
        coords = np.array(f["coords"])
        features = np.array(f["feats"])
    
    with h5py.File(important_h5_path, "r") as f:
        important_coords = np.array(f[slide])[:3]  # Most important tile
    
    # Find index of important tile
    important_indices = []
    for coord in important_coords:
        idx = np.where((coords == coord).all(axis=1))[0]
        if len(idx) > 0:
            important_indices.append(idx[0])
    important_features = features[important_indices]
    print(slide)
    # ---------- 3. SIMILARITY CALCULATION ----------
    similarities = cosine_similarity(features, important_features)
    # Normalize [0, 1]
    summed_sim = similarities.sum(axis=1)
    similarities_norm = (summed_sim/3)
    # ---------- 4. APPLY COLOR OVERLAY ----------
    overlay_np = base_np.copy()  # Start with original image
    
    for idx, (x, y) in enumerate(coords):
        sim_val = similarities_norm[idx]
        
        # Extract region from base image
        region = base_np[int(y/2):int(y/2)+tile_size, int(x/2):int(x/2)+tile_size].copy()
        if region.shape[0] < tile_size or region.shape[1] < tile_size:
            continue  # Skip tiles that go out of bounds
    
        # Create tissue mask (ignore white areas)
        gray = region[:, :, :1]
        _, tissue_mask = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)  # Tissue = dark
        tissue_mask = tissue_mask.astype(bool)
    
        # Get overlay color from colormap
        color = np.array(colormap(sim_val)) * 255  # RGBA
        color = color[:3]  # Take RGB only
        # Apply overlay only on tissue
         
        if sim_val>0.7:
            region_rgb = region[..., :3]
            region_rgb[tissue_mask] = ((1 - alpha) * region_rgb[tissue_mask] + alpha * np.array([255, 0, 0])).astype(np.uint8)
            region[..., :3] = region_rgb
            # Put region back into overlay image
            overlay_np[int(y/2):int(y/2)+tile_size, int(x/2):int(x/2)+tile_size] = region
    
    # ---------- 5. SAVE OUTPUT ----------
    overlay_img = Image.fromarray(overlay_np)
    overlay_img = overlay_img.convert('RGB')
    draw = ImageDraw.Draw(overlay_img)
    for (x,y) in important_coords:
        box = [x/2, y/2, x/2 + tile_size , y/2 + tile_size]
        draw.rectangle(box, outline="black", width=12)  
    overlay_img.save(output_image_path, format="jpeg")  # PNG for quality & transparency

# ---------- 6. DISPLAY LOW-RES INLINE ----------
#low_res = overlay_img.resize((overlay_img.width // 4, overlay_img.height // 4))
#plt.imshow(low_res)
#plt.axis("off")
#plt.show()


In [169]:
 # Base WSI (or downsampled version)
# the paths are left empty to push to github since the data is private
from tqdm import tqdm
important_h5_path = " " # h5 files with important tiles for all the images [image_id: [important tiles]]
targets = # A list of all the slide names
for slide in targets: 
    base_image_path = ""
    feature_h5_path = ""
    output_image_path = ""
    visualization_bg_filtered(slide, base_image_path, feature_h5_path, important_h5_path, output_image_path, tile_size = 224, alpha = 0.5)

BT9-102937-00-R00_105005
BT9-102938-00-R00_105057
BT9-102939-00-R00_105302
BT9-102940-00-R00_105446
BT9-102941-00-R00_105701
BT9-102942-00-R00_105811
BT9-102943-00-R00_105902
BT9-102944-00-R00_110106
BT9-102946-00-R00_110412
BT9-102947-00-R00_110545
BT9-102949-00-R00_110917
BT9-102950-00-R00_111100
BT9-102951-00-R00_111310
BT9-102953-00-R00_111642
BT9-102954-00-R00_111859
BT9-102956-00-R00_112336
BT9-102957-00-R00_112620
BT9-102958-00-R00_112828
BT9-102959-00-R00_113003
BT9-102963-00-R00_113639
BT9-102964-00-R00_113757
BT9-102965-00-R00_114341
BT9-102966-00-R00_114553
BT9-102969-00-R00_115038
BT9-102970-00-R00_115255
BT9-102974-00-R00_115917
BT9-102975-00-R00_120135
BT9-102976-00-R00_120359
BT9-102978-00-R00_120727
BT9-102979-00-R00_120941
BT9-102980-00-R00_121232
BT9-102981-00-R00_121456
BT9-102982-00-R00_121742
BT9-102985-00-R00_122358
BT9-102986-00-R00_122612
BT9-102987-00-R00_122815
BT9-102988-00-R00_123041
BT9-102989-00-R00_123253
BT9-102990-00-R00_123456
BT9-102991-00-R00_123719


#### similarity visualization across slides

In [183]:


def visualization_across_slides_no_bg(features_dir, target_slide, important_coords_path, reference_slides, base_images_dir, output_dir, tile_size=224, alpha =0.5) :
# ----------------------------
    # STEP 2: LOAD TARGET SLIDE FEATURES & COORDS
    # ----------------------------
    target_h5_path = os.path.join(features_dir, f"{target_slide}.h5")
    with h5py.File(target_h5_path, "r") as f:
        target_coords = np.array(f["coords"])       # shape: (num_tiles, 2)
        target_features = np.array(f["feats"])   # shape: (num_tiles, feature_dim)
    
    # ----------------------------
    # STEP 3: LOAD IMPORTANT TILE COORDS FOR ALL REFERENCES
    # ----------------------------
    with h5py.File(important_coords_path, "r") as f:
        important_tiles_dict = {slide: np.array(f[slide]) for slide in reference_slides+[target_slide]}
    
    # ----------------------------
    # STEP 4: AGGREGATE SIMILARITY ACROSS 50 REFERENCES
    # ----------------------------
    # Initialize similarity sum
    summed_sim = np.zeros(len(target_coords))
    
    for ref_slide in reference_slides:
        ref_h5_path = os.path.join(features_dir, f"{ref_slide}.h5")
        with h5py.File(ref_h5_path, "r") as ref_f:
            ref_coords = np.array(ref_f["coords"])
            ref_features = np.array(ref_f["feats"])
    
        # Get top-3 important coords for this reference slide
        top_coords = important_tiles_dict[ref_slide][:3]
    
        # Find their indices
        important_indices = []
        for coord in top_coords:
            idx = np.where((ref_coords == coord).all(axis=1))[0]
            if len(idx) > 0:
                important_indices.append(idx[0])
    
        if len(important_indices) == 0:
            continue  # skip if no matches found
    
        # Extract their features
        ref_important_feats = ref_features[important_indices]
    
        # Compute similarity and sum
        sim_matrix = cosine_similarity(target_features, ref_important_feats)  # shape: (num_target_tiles, 3)
        summed_sim += sim_matrix.sum(axis=1)  # sum across 3 tiles
    
    # Average over references
    avg_sim = summed_sim / len(reference_slides)
    
    # Normalize to [0, 1]
    avg_sim = (avg_sim/3)
    
    # ----------------------------
    # STEP 5: LOAD BASE IMAGE
    # ----------------------------
    base_image_path = base_images_dir
    base_img = Image.open(base_image_path).convert("RGBA")
    overlay = Image.new("RGBA", base_img.size, (0, 0, 0, 0))
    draw = ImageDraw.Draw(overlay)
    
    # ----------------------------
    # STEP 6: CREATE TISSUE MASK
    # ----------------------------
    base_cv = cv2.cvtColor(np.array(base_img), cv2.COLOR_RGBA2RGB)
    gray = base_cv[:,:,0]
    #gray = cv2.cvtColor(base_cv, cv2.COLOR_RGB2GRAY)
    _, tissue_mask = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV)  # tissue = 1, bg = 0
    
    # ----------------------------
    # STEP 7: APPLY HEATMAP ON TISSUE REGIONS ONLY
    # ----------------------------
    for i, (x, y) in enumerate(target_coords):
        if avg_sim[i]>0.6:
            color = (0, 255, 0, int(255 * alpha))
    
            x1, y1 = int(x/2), int(y/2)
            x2, y2 = x1 + tile_size, y1 + tile_size
    
        # Apply color only to tissue pixels
            for yy in range(y1, y2):
                for xx in range(x1, x2):
                    if yy < base_img.height and xx < base_img.width:
                        if tissue_mask[yy, xx] > 0:
                            overlay.putpixel((xx, yy), color)
    
    # ----------------------------
    # STEP 8: SAVE RESULT
    # ----------------------------
    result = Image.alpha_composite(base_img, overlay)
    result1 = result.convert('RGB')
    draw = ImageDraw.Draw(result1)
    for (x,y) in important_tiles_dict[target_slide][:3]:
        box = [x/2, y/2, x/2 + tile_size , y/2 + tile_size]
        draw.rectangle(box, outline="black", width=12) 
        
    os.makedirs(output_dir, exist_ok=True)
    
    output_path = os.path.join(output_dir, f"{target_slide}.jpg")
    result1.save(output_path, format="jpeg")
    print(f"Visualization saved at {output_path}")

In [184]:
import h5py
import numpy as np
import random
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image, ImageDraw
import cv2
import os

# ----------------------------
# CONFIGURATION
# ----------------------------
class_of_interest = 0
num_refs = 50
slide_dict = labels.copy()
# Target slide for visualization


# ----------------------------
# STEP 1: SELECT 50 RANDOM REFERENCE SLIDES
# ----------------------------
reference_slides = [s for s, cls in slide_dict.items() if cls == class_of_interest]
reference_slides = random.sample(reference_slides, min(len(reference_slides), num_refs))
print(f"Selected {len(reference_slides)} reference slides for comparison.")

target_slides = [s for s in slides if s not in reference_slides]
for target_slide in target_slides:
    features_dir = ""
    important_coords_path = ""
    base_images_dir = f""
    if slide_dict[target_slide]==0:
        output_dir = f"{class_of_interest}comparison/class_0/"
    else:
        output_dir = f"/{class_of_interest}comparison/class_1/" 
    visualization_across_slides_no_bg(features_dir, target_slide, important_coords_path, reference_slides, base_images_dir, output_dir, tile_size=224, alpha =0.5)

Selected 50 reference slides for comparison.
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0comparison/class_0/BT9-102937-00-R00_105005.jpg
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0comparison/class_1/BT9-102938-00-R00_105057.jpg
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0comparison/class_0/BT9-102940-00-R00_105446.jpg
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0comparison/class_1/BT9-102942-00-R00_105811.jpg
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0comparison/class_0/BT9-102943-00-R00_105902.jpg
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0comparison/class_0/BT9-102944-00-R00_110106.jpg
Visualization saved at /home/mxn477/m07/eagle/intrepretive_visualizations/background_filtered/0co