# Image cropping function in action
## load data

In [23]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import time

from spatial_tools.image._utils import *
from spatial_tools.image.manipulate import crop_img
from spatial_tools.image.tools import read_tif

import skimage.feature as sk_image

# path to "raw" dataset folder
BASE_PATH = "/storage/groups/ml01/datasets/raw/20200909_PublicVisium_giovanni.palla"
dataset_name = "V1_Adult_Mouse_Brain"
dataset_folder = os.path.join(
    BASE_PATH, "20191205_10XVisium_MouseBrainCoronal_giovanni.palla"
)

#### get feature abt dict method

In [61]:
@timing
def get_features_abt(adata, dataset_folder, dataset_name, features=["hog"]):
    """
    image: array of whole image to crop and calc features from
    spot_ids: array of integers of the spot_id to analyze
    xccord, ycoord: array of ints
    spot_diameter: float
    features: list of feature names to add to dataframe, default to hog
    """
    features_list = []
    
    img = read_tif(dataset_folder, dataset_name)
    
    xcoord = adata.obsm["spatial"][:, 0]
    ycoord = adata.obsm["spatial"][:, 1]
    spot_diameter = adata.uns['spatial'][dataset_name]['scalefactors']['spot_diameter_fullres']
    
    cell_names = adata.obs.index.tolist()
    
    for spot_id, cell_name  in enumerate(cell_names):
        crop_ = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, 
                          sizef=1, spot_diameter=spot_diameter)
        
        features_dict = get_features_statistics(crop_, cell_name, features=features)        
        features_list.append(features_dict)
    
    features_log = pd.DataFrame(features_list)
    features_log["cell_name"] = cell_names
    features_log.set_index(["cell_name"], inplace=True)
    return features_log

def get_features_statistics(im, cell_name, features=["hog"]):
    """
    im: image (numpy array)
    spot_id: the spot id of the image element, int
    features: features to calculate (str), List
    output: pandas Data frame with all features for a image or crop
    """
    stat_dict = {}
    for feature in features:
        if feature == "hog":
            stat_dict.update(get_hog_features(im, feature))
    return stat_dict

def get_hog_features(im, feature_name):
    """
    im: image or image crop, numpy array
    spot_id: the spot id of the image element, int
    output: numpy array with hog features
    """
    hog_dict = {}
    hog_features = sk_image.hog(im)
    for k, hog_feature in enumerate(hog_features):
        hog_dict[f"{feature_name}_{k}"] = hog_feature
    return hog_dict

In [None]:
## example of calculating the feature table, currently just supporting hog features

In [62]:
adata = sc.read_visium(
    dataset_folder, count_file=f"{dataset_name}_filtered_feature_bc_matrix.h5"
)

test = get_features_abt(adata[0:100], dataset_folder, dataset_name, features=["hog"])

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


func:get_features_abt took: 3.7893130779266357 sec


In [None]:
img = read_tif(dataset_folder, dataset_name)

xcoord = adata.obsm["spatial"][:, 0]
ycoord = adata.obsm["spatial"][:, 1]
spot_diameter = adata.uns['spatial'][dataset_name]['scalefactors']['spot_diameter_fullres']
spot_id = 1
crop_1 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, sizef=1, spot_diameter=spot_diameter)
cell_name = adata.obs.index[0]
hog_dict = get_features_statistics(crop_1, cell_name, features=["hog"])

In [None]:
pd.DataFrame(hog_dict, index=[cell_name])

In [None]:
## get feature table with multi threading

In [None]:
from tqdm import tqdm
from joblib import Parallel, delayed

def process(andata_feature, spot_id, cell_name):
    features = ["hog"]
    crop_ = crop_img(andata_feature.image, andata_feature.xcoord[spot_id], andata_feature.ycoord[spot_id], scalef=1, 
                          sizef=1, spot_diameter=andata_feature.spot_diameter)
        
    features_pd = get_features_statistics(crop_, cell_name, features=andata_feature.features)
    return features_pd

@timing
def get_features_abt_multithread(cell_names, num_cores = 1):
    processed_list = Parallel(n_jobs = num_cores)(delayed(process)(andata_feature, iter_, i) for iter_, 
                                              i in enumerate(cell_names))
    
    features_abt = pd.concat(processed_list)
    features_abt.set_index(["cell_name"], inplace=True)
    return features_abt
    
    
class AndataFeature():
    def __init__(self, andata, dataset_folder, dataset_name, features=["hog"]):
        self.image = read_tif(dataset_folder, dataset_name)
        self.xcoord = andata.obsm["spatial"][:, 0]
        self.ycoord = andata.obsm["spatial"][:, 1]
        self.spot_diameter = andata.uns['spatial'][dataset_name]['scalefactors']['spot_diameter_fullres']
        self.features = features

In [None]:
## example on how to retrieve 

In [9]:
from tqdm import tqdm
from joblib import Parallel, delayed
    
class AndataFeature():
    def __init__(self, andata, dataset_folder, dataset_name, features=["hog"]):
        self.image = read_tif(dataset_folder, dataset_name)
        self.xcoord = andata.obsm["spatial"][:, 0]
        self.ycoord = andata.obsm["spatial"][:, 1]
        self.spot_diameter = andata.uns['spatial'][dataset_name]['scalefactors']['spot_diameter_fullres']
        self.features = features
        
    def process(self, cell_name):
        features = ["hog"]
        spot_id = adata.obs.index.get_loc(cell_name)
        crop_ = crop_img(self.image, self.xcoord[spot_id], self.ycoord[spot_id], scalef=1, 
                              sizef=1, spot_diameter=self.spot_diameter)

        features_pd = get_features_statistics_df(crop_, cell_name, features=self.features)
        return features_pd

    @timing
    def get_features_abt_multithread(self, cell_names, num_cores = 1):
        processed_list = Parallel(n_jobs = num_cores, backend="threading")(delayed(self.process)(i) for i in cell_names)

        features_abt = pd.concat(processed_list)
        features_abt.set_index(["cell_name"], inplace=True)
        return features_abt

In [10]:
if __name__ == '__main__':
    adata = sc.read_visium(
        dataset_folder, count_file=f"{dataset_name}_filtered_feature_bc_matrix.h5"
    )

    andata_feature = AndataFeature(andata = adata, dataset_folder=dataset_folder, 
                               dataset_name=dataset_name, features=["hog"])

    cell_names = adata[0:2000].obs.index.tolist()
    andata_feature.get_features_abt_multithread(cell_names , num_cores=5)

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


func:get_features_abt_multithread took: 115.83613753318787 sec


In [None]:
# get cell names
cell_names = adata[0:10].obs.index.tolist()
get_features_abt_multithread(cell_names, num_cores = 1)

## crop image
- use different sizefactors and scalefactors
- try masking

location of the spot that we are cropping

In [None]:
spot_id = 100
plt.scatter(xcoord[spot_id], ycoord[spot_id], c='green')
plt.imshow(img)

crop with different neighborhood sizes. Note that the function also works when the range is outside the image

In [None]:
crop_1 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, sizef=1, spot_diameter=spot_diameter)
crop_2 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, sizef=2, spot_diameter=spot_diameter)
crop_10 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, sizef=10, spot_diameter=spot_diameter)
crop_100 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, sizef=100, spot_diameter=spot_diameter)

fig, axes = plt.subplots(2,2)
axes[0][0].imshow(crop_1)
axes[0][1].imshow(crop_2)
axes[1][0].imshow(crop_10)
axes[1][1].imshow(crop_100)

In [None]:
dict_ = {"1_hog":1,"2_hog":2, "stat_4":4,"stat_5": 5}
log = pd.DataFrame([dict_])
log["cell_name"] = "test"
log

crop with different scales - note how the crops get smaller with smaller `scalef`

In [None]:
crop_1 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=1, sizef=10, spot_diameter=spot_diameter)
crop_05 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=0.5, sizef=10, spot_diameter=spot_diameter)
crop_025 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=0.25, sizef=10, spot_diameter=spot_diameter)
crop_01 = crop_img(img, xcoord[spot_id], ycoord[spot_id], scalef=0.1, sizef=10, spot_diameter=spot_diameter)

fig, axes = plt.subplots(2,2)
axes[0][0].imshow(crop_1)
axes[0][1].imshow(crop_05)
axes[1][0].imshow(crop_025)
axes[1][1].imshow(crop_01)