In [1]:
from miso.hist_features import get_features
from miso.utils import calculate_affinity
from miso.utils import preprocess
from miso import Miso
from PIL import Image
import pandas as pd
import numpy as np
import scanpy as sc
import os
from sklearn.decomposition import PCA
Image.MAX_IMAGE_PIXELS = None

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#install gdown package for downloading tutorial data
!python -m pip install gdown



In [3]:
#download the miso_tutorial_data folder 
!gdown --folder https://drive.google.com/drive/folders/1G4sMqK4kinJ81rTWJkMgWt_mfZEZyaSZ?usp=drive_link
#set working directory to miso_tutorial_data
os.chdir('miso_tutorial_data')

Retrieving folder contents
Processing file 1253InghpAya2vL9SoFGiwqTT2Y2y_H0Z he-raw.tif
Processing file 1dOXTMkqqi_GQgMKUCz1jRONHgXwmGyWv locs.csv
Processing file 1zue4elVDvQztzK1VfaSKunRw-0KWoyPt protein.h5ad
Processing file 1X_uc2bLUS7wYcAL_ICns6a5U-q7TgCu6 rna.h5ad
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From (original): https://drive.google.com/uc?id=1253InghpAya2vL9SoFGiwqTT2Y2y_H0Z
From (redirected): https://drive.google.com/uc?id=1253InghpAya2vL9SoFGiwqTT2Y2y_H0Z&confirm=t&uuid=15b29fa2-24ec-4bd0-8255-ac15b544623d
To: /Users/kylecoleman/data/miso/tutorial/miso_tutorial_data/he-raw.tif
100%|████████████████████████████████████████| 938M/938M [00:37<00:00, 25.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1dOXTMkqqi_GQgMKUCz1jRONHgXwmGyWv
To: /Users/kylecoleman/data/miso/tutorial/miso_tutorial_data/locs.csv
100%|████████████████████████████████████████| 158k/158k [00:00<00:00, 3.26MB/s]
Dow

In [5]:
#If an H&E image is available, this code chunk extracts spot-level histology features
im = Image.open('he-raw.tif')


#microns per pixel in H&E image
pixel_size_raw = 65/255.54640512302527

#desired microns per pixel
pixel_size = 0.5

#spot radius in pixels
rad=55/(2*pixel_size_raw)

#spot spatial locations in pixels
locs = pd.read_csv('locs.csv', index_col = 0)
locs.columns = [str(i) for i in range(1,6)]
image_emb = get_features(im,locs,rad,pixel_size_raw,pixel_size,pretrained=True,device='cpu')
np.save('image_emb.npy', image_emb)

shift 0/256, 0/256
Extracting embeddings...
Take key teacher in provided checkpoint dict
Pretrained weights loaded from /Users/kylecoleman/data/miso/miso/checkpoints/vit256_small_dino.pth
# of Patches: 196
Take key teacher in provided checkpoint dict
Pretrained weights loaded from /Users/kylecoleman/data/miso/miso/checkpoints/vit4k_xs_dino.pth
tile 0 / 6
209 sec
210 sec
shift 0/256, 64/256
Extracting embeddings...
Take key teacher in provided checkpoint dict
Pretrained weights loaded from /Users/kylecoleman/data/miso/miso/checkpoints/vit256_small_dino.pth
# of Patches: 196
Take key teacher in provided checkpoint dict
Pretrained weights loaded from /Users/kylecoleman/data/miso/miso/checkpoints/vit4k_xs_dino.pth
tile 0 / 6
208 sec
208 sec
shift 0/256, 128/256
Extracting embeddings...
Take key teacher in provided checkpoint dict
Pretrained weights loaded from /Users/kylecoleman/data/miso/miso/checkpoints/vit256_small_dino.pth
# of Patches: 196
Take key teacher in provided checkpoint dict


In [6]:
#Load data and perform necessary preprocessing
rna = sc.read('rna.h5ad')
rna = preprocess(rna,modality='rna')
rna_pcs = PCA(128).fit_transform(rna.X)

protein = sc.read('protein.h5ad')
protein = preprocess(protein,modality='protein')
#protein_pcs = PCA(128).fit_transform(protein.X)

image_pcs = PCA(128).fit_transform(image_emb)

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [7]:
#Calculate adjacency matrices for each modality
#Recommend using sparse=True if there are more than 10,000 spots/cells
A1 = calculate_affinity(rna.X, sparse = False)
A2 = calculate_affinity(protein.X, sparse = False)
A3 = calculate_affinity(image_emb, sparse = False)
A = [A1,A2,A3]

In [8]:
#if using a subset of modality-specific terms, the "ind_views" parameter should be a list with values entries to the indices of the modalities to be included, e.g.,  ind_views=[0,2] if including RNA and image features
#if using a subset of interaction terms, the "combs" parameter should be a list of tuples with entries to the indices of the modalities for each interaction, e.g. combs = [(0,1),(0,2)] if including the RNA-protein and RNA-image interaction terms
model = Miso([rna_pcs,protein.X,image_pcs],A,ind_views='all',combs='all',sparse=False,device='cpu')
model.train()
np.save('emb.npy', model.emb)

Training network for modality 1


100%|██████████| 1000/1000 [03:33<00:00,  4.67it/s]


Training network for modality 2


100%|██████████| 1000/1000 [03:28<00:00,  4.80it/s]


Training network for modality 3


100%|██████████| 1000/1000 [03:33<00:00,  4.69it/s]


In [None]:
model.cluster(n_clusters=10)
