In [1]:
!pip install -q SPARQLWrapper

In [2]:
!pip install -e /dss/dsshome1/04/di93zer/git/cellnet --no-deps

Obtaining file:///dss/dsshome1/04/di93zer/git/cellnet
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: cellnet
  Running setup.py develop for cellnet
Successfully installed cellnet


In [1]:
import os
from os.path import join

import pandas as pd
import numpy as np

In [2]:
DATA_PATH = '/mnt/dssmcmlfs01/merlin_cxg_2023_05_15_sf-log1p'

# DATA_PATH = '/mnt/dssmcmlfs01/merlin_cxg_raw_split_by_tech_sample'

# Compute lookup matrices 

In [3]:
cell_type_mapping = pd.read_parquet(join(DATA_PATH, 'categorical_lookup/cell_type.parquet'))

inverse_mapping = (
    cell_type_mapping
    .assign(idx=range(len(cell_type_mapping)))
    .set_index('label', drop=True)
)
inverse_mapping.head()

Unnamed: 0_level_0,idx
label,Unnamed: 1_level_1
B cell,0
Bergmann glial cell,1
"CD14-low, CD16-positive monocyte",2
CD14-positive monocyte,3
"CD14-positive, CD16-negative classical monocyte",4


In [4]:
from cellnet.utils.cell_ontology import retrieve_child_nodes_from_ubergraph


celltypes = cell_type_mapping.label.tolist()
child_nodes_dict = {}
for k, v in retrieve_child_nodes_from_ubergraph(celltypes).items():
    child_nodes_dict[k] = [elem for elem in v if elem in celltypes]

In [5]:
children_idx = []

for cell_type in cell_type_mapping.label:
    child_nodes = child_nodes_dict[cell_type]
    children_idx.append(inverse_mapping.loc[child_nodes].idx.sort_values().tolist())

cell_type_mapping['children'] = children_idx

In [6]:
os.makedirs(join(DATA_PATH, 'cell_type_hierarchy'), exist_ok=True)

In [7]:
child_matrix = np.eye(len(cell_type_mapping))

for i, child_nodes in enumerate(cell_type_mapping.children):
    child_matrix[i, child_nodes] = 1.
    
with open(join(DATA_PATH, 'cell_type_hierarchy/child_matrix.npy'), 'wb') as f:
    np.save(f, child_matrix)

# Sanity check lookup matrices

In [8]:
cell_type_mapping.loc[np.where(child_matrix[0, :] == 1.)[0]].label.tolist()

['B cell',
 'class switched memory B cell',
 'immature B cell',
 'mature B cell',
 'memory B cell',
 'naive B cell',
 'plasmablast',
 'precursor B cell',
 'transitional stage B cell']