### Using *fusion-tools* to better understand and annotate spatial --omics data

In [1]:
# This cell only needs to be executed once, just making sure that the package is correctly installed 
# and is accessible by your Python kernel
#%pip install --upgrade setuptools
#%pip install fusion-tools[interactive]==3.5.71
#%pip install large-image[common]==1.29.11

#### **Make sure you restart the kernel before proceeding!**

##### Check the output of the following to make sure that the correct package versions were installed:
- fusion-tools==3.5.40
- large-image==1.29.11

In [2]:
%pip list

Package                        VersionNote: you may need to restart the kernel to use updated packages.

------------------------------ ------------
affine                         2.4.0
aiobotocore                    2.5.4
aiohappyeyeballs               2.4.0
aiohttp                        3.9.3
aioitertools                   0.12.0
aiosignal                      1.3.1
alabaster                      0.7.16
anndata                        0.10.6
annotated-types                0.7.0
ansi2html                      1.8.0
anyio                          4.4.0
app-model                      0.2.6
appdirs                        1.4.4
argcomplete                    3.5.3
array_api_compat               1.8
asciitree                      0.3.3
asttokens                      2.4.1
attrs                          24.2.0
babel                          2.16.0
backcall                       0.2.0
backports.tarfile              1.2.0
beautifulsoup4                 4.12.3
bleach                         6.

#### Importing packages as needed

In this case, the *Visualization* object and interactive components.

In [None]:
import os
import anndata as ad
import pandas as pd
from tqdm import tqdm
import json

from fusion_tools.visualization import Visualization
from fusion_tools.components import HybridSlideMap, OverlayOptions, PropertyPlotter, BulkLabels, HRAViewer

from fusion_tools.utils.omics import get_gene_info


  from .autonotebook import tqdm as notebook_tqdm


#### Pointing to your downloaded files

Edit the following lines to indicate where the data you downloaded from the HuBMAP Portal is located on your computer.

In [8]:
# In the bottom two lines, os.getcwd() gets the folder that this script is stored in and then that is combined with the names below
# to point to the image and annotations
downloaded_image = os.path.join(os.getcwd(),'Portal_Data','visium_histology_hires_pyramid.ome.tif')
downloaded_analysis = os.path.join(os.getcwd(),'Portal_Data','secondary_analysis.h5ad')

#### If the following lines run successfully then your data paths are valid, if not then check the paths to make sure you're pointing at the right location.

In [9]:
print(downloaded_image)

print(os.path.exists(downloaded_image))
import large_image

test = large_image.open(downloaded_image)
print(test)

c:\Users\samuelborder\Desktop\HIVE_Stuff\FUSION\Canvas Course Folder\Portal_Data\visium_histology_hires_pyramid.ome.tif
True
TifffileFileTileSource ('c:\\Users\\samuelborder\\Desktop\\HIVE_Stuff\\FUSION\\Canvas Course Folder\\Portal_Data\\visium_histology_hires_pyramid.ome.tif', 'JPEG', 95, 0, 'raw', False, '__STYLESTART__', None, '__STYLEEND__')


#### Extracting information to add to annotations

The following cells just define some functions for getting the genes with the greatest "dispersion" value


In [10]:
def extract_high_dispersion(analysis_data, top_k = 25):

    # Finding the "var" associated with normalized dispersion and sorting values from high to low
    highest_norm_dispersions = analysis_data.var['dispersions_norm'].sort_values(ascending=False)
    # Listing just the names and not the values
    top_k_names = list(highest_norm_dispersions.index)[:top_k]

    return top_k_names

def translate_ensg_names(names_list):
    # Storing the information in a dictionary
    n_info_dict = {}
    # Iterating through the provided list of names (tqdm provides a nice progress bar)
    for n in tqdm(names_list):
        # Using the mygene.info API to extract information on this particular gene ID
        n_info = get_gene_info(n)
        # Adding information to the dictionary
        n_info_dict[n] = n_info

    return n_info_dict

#### Loading the *secondary_analysis.h5ad* file and pulling out relevant info per-spot

In [11]:
analysis_data = ad.read_h5ad(downloaded_analysis)

In [12]:
top_k_variable = extract_high_dispersion(analysis_data)
top_k_info = translate_ensg_names(top_k_variable)

# This is a nice way of visualizing the extracted gene information. 
# Any that don't have an "alias" field just aren't well defined compared to the others that have a summary and alias
print(json.dumps(top_k_info,indent=4))

100%|██████████| 25/25 [00:17<00:00,  1.44it/s]

{
    "ENSG00000211899.10": {
        "HGNC": "5541",
        "_id": "3507",
        "_version": 1,
        "alias": [
            "AGM1",
            "MU",
            "VH"
        ],
        "summary": "Immunoglobulins (Ig) are the antigen recognition molecules of B cells. An Ig molecule is made up of 2 identical heavy chains and 2 identical light chains (see MIM 147200) joined by disulfide bonds so that each heavy chain is linked to a light chain and the 2 heavy chains are linked together. Each Ig heavy chain has an N-terminal variable (V) region containing the antigen-binding site and a C-terminal constant (C) region, encoded by an individual C region gene, that determines the isotype of the antibody and provides effector or signaling functions. The heavy chain V region is encoded by 1 each of 3 types of genes: V genes (see MIM 147070), joining (J) genes (see MIM 147010), and diversity (D) genes (see MIM 146910). The C region genes are clustered downstream of the V region genes wit




In [None]:
print(json.dumps(top_k_info[list(top_k_info.keys())[0]],indent=4))

#### Now creating the Spot annotations and adding the most variable gene info

In [None]:
# Loaded annotations with the gene names found in the previous step
from fusion_tools.utils.shapes import load_visium
downloaded_annotations = load_visium(downloaded_analysis,include_var_names = top_k_variable)

#### Setting up the visualization 
In this next section, we'll define a *Visualization* object including a *SlideMap*, *OverlayOptions*, *PropertyPlotter*, *HRAViewer*, and *BulkLabels*

In [None]:
# Defining a Visualization object with images, annotations, and components
vis_object = Visualization(
    local_slides = [downloaded_image],
    local_annotations = [downloaded_annotations],
    linkage = 'row',
    components = [
        [
            HybridSlideMap(),
            [
                OverlayOptions(),
                PropertyPlotter(),
                HRAViewer(),
                BulkLabels()
            ]
        ]
    ],
    app_options = {
        'jupyter': True,
        'default_page': 'main'
    }
)

# This line starts the visualization 
vis_object.start()