<h1 style="text-align: center; font-family: Verdana; font-size: 32px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; font-variant: small-caps; letter-spacing: 3px; color: #000000; background-color: #ffffff;">CONVERT SLIDES TO TILES</h1>

<h2 style="text-align: center; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: bold; text-decoration: underline; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;">Notebook to Convert Slide (.svs & .qptiff) Images to Tiled Images</h2>

<br><br>

<h2 style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;">TABLE OF CONTENTS</h2>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#imports">0&nbsp;&nbsp;&nbsp;&nbsp;IMPORTS</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#setup">1&nbsp;&nbsp;&nbsp;&nbsp;SETUP & PREPROCESSING</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#tile_slides">2&nbsp;&nbsp;&nbsp;&nbsp;TILE THE SLIDES</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#automl_csv">3&nbsp;&nbsp;&nbsp;&nbsp;GENERATE THE AUTOML CSV</a></h3>

---

<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="imports">0&nbsp;&nbsp;IMPORTS</a>

In [2]:
# General imports
import os
import numpy as np
import pandas as pd

# Utility functions
from utils import get_paths, get_slide_region
from utils import tile_tiff, tile_svs
from utils import determine_label, tile_from_path, create_automl_preannotated_csv

<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="setup">1&nbsp;&nbsp;SETUP</a>

<h3 style="font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff; font-variant: small-caps;">1.1  DEFINE BASE PATHS</h3>

---

In [3]:
# Define base paths
path_to_focus = "/home/jupyter/data/Focus/Out_Focus"
path_to_tissue = "/home/jupyter/data/Tissue/No_Tissue"
path_to_stained = "/home/jupyter/data/Stained"
path_to_he = "/home/jupyter/data/H&E"
path_to_dim = os.path.join(path_to_stained, "Dim")
path_to_no = os.path.join(path_to_stained, "No")
path_to_yes = os.path.join(path_to_stained, "Yes")

<h3 style="font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff; font-variant: small-caps;">1.2  CLASS SPECIFIC FULL PATHS</h3>

---

We also segment the master list of paths into the first subset (what will be trained-on/annotated during the bulk of this project) and the remaining subset (which will potentially be trained-on/annotated after. Time permitting)

In [4]:
# Get full paths to various image classes
oof_paths = get_paths(path_to_focus)
stn_yes_paths = get_paths(path_to_yes)
stn_dim_paths = get_paths(path_to_dim)
stn_no_paths = get_paths(path_to_no)
no_tis_paths = get_paths(path_to_tissue)
he_tis_paths = get_paths(path_to_he)

first_set = oof_paths+stn_yes_paths[:20]+stn_dim_paths[:10]+stn_no_paths[:10]+no_tis_paths+he_tis_paths
last_set = stn_yes_paths[20:]+stn_dim_paths[10:]+stn_no_paths[10:]

# Visualize Out of Focus Image of Tissue (? Is stained ?)
print(f"\nOOF IMAGE PATHS   –   There are {len(oof_paths)} Out of Focus Files")
print("\t– ", oof_paths[:2]+["etc..."])

# Visualize Positive Stain (Strong)
print(f"\n\nNORMAL POSITIVE STAIN IMAGE PATHS   –   There are {len(stn_yes_paths)} Positive Stain Files")
print("\t– ", stn_yes_paths[:2]+["etc..."])

# Visualize Positive Stain (Weak)
print(f"\n\nDIM POSITIVE STAIN IMAGE PATHS   –   There are {len(stn_dim_paths)} Dim-Positive Stain Files")
print("\t– ", stn_dim_paths[:2]+["etc..."])

# Visualize Negative Stain (No Stain Present)
print(f"\n\nNEGATIE STAIN IMAGE PATHS   –   There are {len(stn_no_paths)} Negative Stain Files")
print("\t– ", stn_no_paths[:2]+["etc..."])

# Visualize No Tissue on Slide
print(f"\n\nNO TISSUE IMAGE PATHS   –   There are {len(no_tis_paths)} No-Tissue Files")
print("\t– ", no_tis_paths[:2]+["etc..."])

# Visualize No Tissue on Slide
print(f"\n\nH&E TISSUE IMAGE PATHS   –   There are {len(he_tis_paths)} H&E Tissue Files")
print("\t– ", he_tis_paths[:2]+["etc..."])

print("\n----------------------------------------------------")

print(f"\nThere are {len(first_set)} slides in the initial subset")
print(f"There are {len(last_set)} slides in the remaining subset\n")


OOF IMAGE PATHS   –   There are 13 Out of Focus Files
	–  ['/home/jupyter/data/Focus/Out_Focus/41752.svs', '/home/jupyter/data/Focus/Out_Focus/42684.svs', 'etc...']


NORMAL POSITIVE STAIN IMAGE PATHS   –   There are 131 Positive Stain Files
	–  ['/home/jupyter/data/Stained/Yes/SGN010B_APD ML1407166A 20191107 CD228 ^1132440.svs', '/home/jupyter/data/Stained/Yes/SGN010B_APD ML0903048C 20191107 CD228 ^1132151.svs', 'etc...']


DIM POSITIVE STAIN IMAGE PATHS   –   There are 40 Dim-Positive Stain Files
	–  ['/home/jupyter/data/Stained/Dim/SGN010B_APD ML0906097A 20191108 CD228 ^1133078.svs', '/home/jupyter/data/Stained/Dim/SGN010B_APD MPB03103A 20191111 CD228 ^1131843.svs', 'etc...']


NEGATIE STAIN IMAGE PATHS   –   There are 42 Negative Stain Files
	–  ['/home/jupyter/data/Stained/No/SGN010B_APD ML1703166 20191105 CD228 ^1120968.svs', '/home/jupyter/data/Stained/No/SGN023_APD ML1809102D 20200406 avb6 DF4000^1174437.svs', 'etc...']


NO TISSUE IMAGE PATHS   –   There are 7 No-Tissue Files

<h3 style="font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff; font-variant: small-caps;">1.3 (optional) MAKE ALIAS MAPPING TO DECREASE NAME COMPLEXITY</h3>

---

In [5]:
NAME_MAP_DIR = "/home/jupyter/files/"
NAME_MAP_FILES = [
    os.path.join(NAME_MAP_DIR, f_name) \
    for f_name in os.listdir(NAME_MAP_DIR) \
    if f_name.startswith("name_map")
]

if not NAME_MAP_FILES:
    # Create mapping
    first_name_map = {path.rsplit("/", 1)[1]:f"img_{i:03}.{path.rsplit('.',1)[1]}" for i, path in enumerate(first_set)}
    remaining_name_map = {path.rsplit("/", 1)[1]:f"img_{i+200:03}.{path.rsplit('.',1)[1]}" for i, path in enumerate(first_set)}

    # Save mappings to .CSV for later use
    pd.DataFrame(data=(first_name_map.keys(), first_name_map.values())).T.to_csv(
        "/home/jupyter/files/name_map_first_60.csv", 
        index=None, header=["full_path", "alias"]
    )
    pd.DataFrame(data=(remaining_name_map.keys(), remaining_name_map.values())).T.to_csv(
        "/home/jupyter/files/name_map_200_to_206_he.csv", 
        index=None, header=["full_path", "alias"]
    )
    NAME_MAP_FILES = ["/home/jupyter/files/name_map_first_60.csv", "/home/jupyter/files/name_map_200_to_206_he.csv"]

name_map_df = pd.concat([pd.read_csv(path) for path in NAME_MAP_FILES]).reset_index(drop=True)
name_map_df["full_path"] = name_map_df["full_path"].apply(lambda x: x.split(".", 1)[0])
name_map_df["alias"] = name_map_df["alias"].apply(lambda x: x.split(".", 1)[0])

# DISPLAY
display(name_map_df)

# Create Mapping Dictionaries for File Names
original_2_alias = {original:alias for original, alias in zip(name_map_df.full_path.values, name_map_df.alias.values)}
alias_2_original = {v:k for k,v in original_2_alias.items()}

Unnamed: 0,full_path,alias
0,11606,img_200
1,7373,img_201
2,AVD-B1VOV-2579A,img_202
3,AVD-B1VOV-3267A,img_203
4,AVD-B1VOV-3450A,img_204
...,...,...
62,29888,img_055
63,29877,img_056
64,29890,img_057
65,29841,img_058


<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="tile_slides">2&nbsp;&nbsp;TILE THE SLIDES</a>

In [10]:
def tile_set(path_set, output_tile_dir="/tmp", use_name_mapping=False):
    """ Convert Set of Slides To Respective Tile-Level Images """
    for original_path in path_set:
        if use_name_mapping:
            new_path = os.path.join(
                original_path.rsplit("/", 1)[0],
                original_2_alias[original_path.rsplit("/", 1)[1].rsplit(".", 1)[0]]+"."+original_path.rsplit(".", 1)[1]
            )
        else:
            new_path = original_path
        print(f"\n\n\n\t... Starting Slide `{original_path}` ...\n\n\n")

        if original_path.endswith(".svs"):
            tile_from_path(original_path, slide_name=new_path, 
                           output_dir_root=output_tile_dir, 
                           divert_blanks=True, verbose=1)
        else:
            tile_from_path(original_path, slide_name=new_path, 
                           output_dir_root=output_tile_dir, 
                           divert_blanks=True, verbose=1, style="qptiff")

# tile_set(first_set, output_tile_dir="/home/jupyter/tiles")
# tile_set(last_set, output_tile_dir="/home/jupyter/tiles")

<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="automl_csv">3&nbsp;&nbsp;GENERATE THE AUTOML CSV</a>

In [None]:
def create_automl_preannotated_csv(tile_dir, gcs_pre_path="gs://seagen-quantiphi/to-be-annotated"):
    """ Create the AUTOML CSV to be used to update annotations 
    
    Args: 
        tile_dir (str): Path to the directory containing the sub-directories
            named after the respective labels of the images/slides contained there-within
        gcs_pre_path (str): Path to the gcs bucket the images will be stored
    
    Returns: 
        None; 
            A .CSV file will be generated into the `tile_dir` directory containing
            the information required to perform annotation using AutoML
    
    """
    
    tile_map = {"image_paths":[], "labels":[]}
    for label in [name for name in os.listdir(tile_dir) if not name.endswith(".csv")]:
        full_paths = [
            os.path.join(gcs_pre_path, label, f_name) 
            for f_name in os.listdir(os.path.join(tile_dir, label)) 
            if f_name.endswith(".png")
        ]
        tile_map["labels"].extend([label,]*len(full_paths))
        tile_map["image_paths"].extend(full_paths)
    pd.DataFrame(tile_map).to_csv(os.path.join(tile_dir, "automl.csv"), 
                                  index=False, 
                                  header=False, 
                                  encoding="utf-8")

# ################################################################################# #
# Function below assumes all images will be moved from `tile_dir` to `gcs_pre_path` #
# ################################################################################# #
#                                                                                   #
# --> See below for basic command to move files from local to GCS                   #
#                                                                                   #
# >>> gsutil -m cp -r "{tile_dir}/*" "{gcs_pre_path}"                               #
#                                                                                   #
#                                ----- OR -----                                     #
#                                                                                   #
# >>> gsutil -m cp -r /home/jupyter/tiles/* gs://seagen-quantiphi/to-be-annotated   #
#                                                                                   #
# ################################################################################# #
create_automl_preannotated_csv(tile_dir="/home/jupyter/tiles", 
                               gcs_pre_path="gs://seagen-quantiphi/to-be-annotated")