<h1 style="text-align: center; font-family: Verdana; font-size: 32px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; font-variant: small-caps; letter-spacing: 3px; color: #000000; background-color: #ffffff;">UPDATE TILE LEVEL METADATA</h1>

<h2 style="text-align: center; font-family: Verdana; font-size: 14px; font-style: normal; font-weight: bold; text-decoration: underline; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;">Notebook to Assist in Perserving Information Lost When Annotating Using the AutoML Tool</h2>

<br><br>

<h2 style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; color: navy; background-color: #ffffff;">TABLE OF CONTENTS</h2>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#imports">0&nbsp;&nbsp;&nbsp;&nbsp;IMPORTS</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#setup">1&nbsp;&nbsp;&nbsp;&nbsp;SETUP & PREPROCESSING</a></h3>

---

<h3 style="text-indent: 10vw; font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff;"><a href="#update">2&nbsp;&nbsp;&nbsp;&nbsp;UPDATE BLOB METADATA</a></h3>

---

<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="imports">0&nbsp;&nbsp;IMPORTS</a>

In [24]:
# Data Science Imports
from sklearn.tree import _tree
from sklearn import tree
import pandas as pd; pd.options.mode.chained_assignment=None;
import numpy as np

# Domain Related Imports
from google.cloud import storage
import openslide
import tifffile

# Built-In Imports
import math
import PIL
import cv2
import os
import gc

# Utility Functions
from utils import get_paths, get_slide_region
from utils import tile_tiff, tile_svs
from utils import determine_label, tile_from_path, create_automl_preannotated_csv

# Visualization imports
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import seaborn as sns
from PIL import Image

<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="setup">1&nbsp;&nbsp;SETUP</a>

<h3 style="font-family: Verdana; font-size: 20px; font-style: normal; font-weight: normal; text-decoration: none; text-transform: none; letter-spacing: 2px; color: navy; background-color: #ffffff; font-variant: small-caps;">1.1  GET GCS PATHS</h3>

---

In [27]:
GCS_TILE_DIR = "gs://seagen-quantiphi/to-be-annotated"
GCS_FILE_PATHS = !gsutil ls {GCS_TILE_DIR}/**/*.png
print(f"NUMBER OF GCS PATHS TO TILES: {len(GCS_FILE_PATHS)}")

NUMBER OF GCS PATHS TO TILES: 22594


<br>

<a style="font-family: Verdana; font-size: 24px; font-style: normal; font-weight: bold; text-decoration: none; text-transform: none; letter-spacing: 3px; background-color: #ffffff; color: navy;" id="setup">2&nbsp;&nbsp;UPDATE BLOB METADATA</a>

In [31]:
def set_blob_metadata(gcs_path, verbose=False):
    """Set a blob's metadata"""
    # Split path 
    #    start with ... --> gs://bucket/blob/blob/blob.png
    #    ... becomes    --> bucket, blob/blob/blob.png
    bucket_name, blob_name = gcs_path[5:].split("/", 1)
    
    # Get the metadata
    file_name = blob_name.rsplit("/", 1)[1]
    slide_name, tl_x, tl_y = file_name[:-4].rsplit("_", 2)
    metadata = {"slide_name": slide_name, "x_coord":tl_x, "y_coord":tl_y}

    # Get the connection to the appropriate GCS path
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.get_blob(blob_name)
    
    # Update blob
    blob.metadata = metadata
    blob.patch()

    if verbose:
        print(f"\t– The metadata for the blob {blob.name} is:\n\t\t––> {blob.metadata}")


# See an example on the first two paths
for gcs_path in GCS_FILE_PATHS[:2]:
    set_blob_metadata(gcs_path, verbose=True)
    
# ########################################################### #
# ######## USE THIS TO UPDATE ALL GCS FILE METADATA ######### #
# ########################################################### #
# for gcs_path in GCS_FILE_PATHS: set_blob_metadata(gcs_path) #
# ########################################################### #

	– The metadata for the blob to-be-annotated/dim_positive/img_033_01024_04096.png is:
		––> {'x_coord': '01024', 'slide_label': 'dim_positive', 'slide_name': 'img_033', 'y_coord': '04096'}
	– The metadata for the blob to-be-annotated/dim_positive/img_033_01024_05120.png is:
		––> {'x_coord': '01024', 'slide_label': 'dim_positive', 'slide_name': 'img_033', 'y_coord': '05120'}
