In [1]:
import os
import scanpy as sc
import pandas as pd
import numpy as np
from src.functions import get_matrices_from_dfs

In [2]:
project_path = '/Volumes/alexandr/smenon/2022-07-13_Glioblastoma/processed_files'
samples = os.listdir(project_path)

In [3]:
sample = os.path.join(project_path, samples[0], "analysis")
files = {
    'sm_matrix': 'ablation_mark_analysis/spatiomolecular_adata.h5ad',
    'overlap_regions': 'overlap_analysis1/overlap.regions.csv',
    'mark_regions': 'overlap_analysis1/ablation_mark.regions.csv',
    'cell_regions': 'overlap_analysis1/cell.regions.csv',
    'cell_sm_matrix': 'single_cell_analysis/spatiomolecular_adata.h5ad',
}

project_files = {k: os.path.join(sample, v) for k, v in files.items()}

project_files

{'sm_matrix': '/Volumes/alexandr/smenon/2022-07-13_Glioblastoma/processed_files/B1/analysis/ablation_mark_analysis/spatiomolecular_adata.h5ad',
 'overlap_regions': '/Volumes/alexandr/smenon/2022-07-13_Glioblastoma/processed_files/B1/analysis/overlap_analysis1/overlap.regions.csv',
 'mark_regions': '/Volumes/alexandr/smenon/2022-07-13_Glioblastoma/processed_files/B1/analysis/overlap_analysis1/ablation_mark.regions.csv',
 'cell_regions': '/Volumes/alexandr/smenon/2022-07-13_Glioblastoma/processed_files/B1/analysis/overlap_analysis1/cell.regions.csv',
 'cell_sm_matrix': '/Volumes/alexandr/smenon/2022-07-13_Glioblastoma/processed_files/B1/analysis/single_cell_analysis/spatiomolecular_adata.h5ad'}

In [4]:
cell_regions = pd.read_csv(project_files['cell_regions'])
mark_regions = pd.read_csv(project_files['mark_regions'])
overlap_regions = pd.read_csv(project_files['overlap_regions'])
overlap_regions

Unnamed: 0,overlap_id,cell_id,am_id,bbox_x0,bbox_y0,bbox_x1,bbox_y1,area
0,1,3,17,645,22,667,30,120.0
1,2,4,55,2122,22,2152,46,333.0
2,3,5,70,2729,23,2742,45,220.0
3,4,5,71,2746,21,2778,42,353.0
4,5,6,37,1440,22,1456,53,290.0
...,...,...,...,...,...,...,...,...
1236,1237,321,6269,1104,3092,1132,3110,308.0
1237,1238,321,6348,1085,3114,1087,3115,2.0
1238,1239,321,6349,1107,3114,1139,3131,331.0
1239,1240,321,6350,1143,3114,1178,3146,796.0


In [5]:
overlap_matrix, sampling_spec_matrix = get_matrices_from_dfs(mark_area = mark_regions, cell_area = cell_regions, marks_cell_overlap = overlap_regions)

In [6]:
sm_matrix = sc.read(os.path.join(sample, files['sm_matrix']))
sm_matrix

AnnData object with n_obs × n_vars = 6400 × 2100
    obs: 'center_x', 'center_y', 'am_area', 'am_sampling_area', 'am_sampling_ratio', 'am_nearest_cell_distance', 'centroid-0', 'centroid-1', 'bbox-0', 'bbox-1', 'bbox-2', 'bbox-3', 'local_centroid-0', 'local_centroid-1', 'area', 'filled_area', 'solidity', 'perimeter', 'eccentricity', 'euler_number', 'moments_hu-0', 'moments_hu-1', 'moments_hu-2', 'moments_hu-3', 'moments_hu-4', 'moments_hu-5', 'moments_hu-6', 'moments_central-0-0', 'moments_central-0-1', 'moments_central-0-2', 'moments_central-0-3', 'moments_central-1-0', 'moments_central-1-1', 'moments_central-1-2', 'moments_central-1-3', 'moments_central-2-0', 'moments_central-2-1', 'moments_central-2-2', 'moments_central-2-3', 'moments_central-3-0', 'moments_central-3-1', 'moments_central-3-2', 'moments_central-3-3', 'inertia_tensor_eigvals-0', 'inertia_tensor_eigvals-1', 'major_axis_length', 'minor_axis_length', 'weighted_moments_central-0-0-DAPI', 'weighted_moments_central-0-1-DAPI'

In [7]:
from src.functions import PIXEL_PRE


sm_matrix.obs_names = PIXEL_PRE + sm_matrix.obs_names

In [8]:
from src.functions import get_molecule_normalization_factors
import statistics as st


total_pixel_overlap, full_pixel_intensities_median = get_molecule_normalization_factors(sm_matrix.to_df(), overlap_matrix, method= st.median)

In [9]:
import src.functions
from src.functions import correct_intensities_quantile_regression
from importlib import reload
reload(src.functions)


corrected_intensities = correct_intensities_quantile_regression(sm_matrix.to_df(), total_pixel_overlap, full_pixel_intensities_median, reference_ions=sm_matrix.var_names)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [10]:
corr_sm_matrix = sm_matrix.copy()
corr_sm_matrix.X = corrected_intensities

In [11]:
pd.concat([corr_sm_matrix.to_df().replace(0, np.nan)['C10H20NO8P+K'], sm_matrix.to_df().replace(0, np.nan)['C10H20NO8P+K'], total_pixel_overlap.replace(0, np.nan)], axis=1).dropna()

Unnamed: 0,C10H20NO8P+K,C10H20NO8P+K.1,total_pixel_area
pixel_80,216.316875,406.304199,0.694704
pixel_180,10.714414,103.297676,0.139438
pixel_189,38.253912,144.616760,0.349585
pixel_229,1.212734,117.070709,0.014523
pixel_230,90.328211,117.070709,1.000000
...,...,...,...
pixel_6145,23.144753,254.800949,0.122407
pixel_6162,1.284767,158.389786,0.011423
pixel_6230,172.105505,447.623291,0.504683
pixel_6231,334.563476,482.055847,0.901247


In [12]:
corr_sm_matrix.write('data/Lx_Glioblastoma/spatiomolecular_adata_corrected.h5ad')