# Annotate clusters

## Import statements

In [1]:
import os,sys
import datetime

In [2]:
import scanpy as sc
sc.logging.print_versions()
sc.logging.print_memory_usage()
sc.settings.verbosity = 2

scanpy==1.4.4.post1 anndata==0.6.22.post1 umap==0.3.7 numpy==1.15.4 scipy==1.3.1 pandas==0.23.4 scikit-learn==0.20.1 statsmodels==0.10.1 python-igraph==0.7.1 louvain==0.6.1
Memory usage: current 0.20 GB, difference +0.20 GB


In [3]:
## This cell is run once to download my custom functions and import statements from github
#
#!git clone --depth=1 https://github.com/rapolaszilionis/utility_functions
#    
## github doesn't seem to have an option to download a specific version of the repo from the history.
## So I download my utility functions and save the download time by appending it to the directory name.
## These utility functions to be shared together with the notebook.
#
#toappend = datetime.datetime.now().strftime('%y%m%d_%Hh%M')
#newname = "utility_functions_%s"%toappend
#print(newname)
#
#
## rename the py file with utility functions
#os.rename("utility_functions",newname)

In [4]:
# add the utility function folder to PATH
sys.path.append(os.path.abspath("utility_functions_210525_14h42/"))

from rz_import_statements import *
import rz_functions as rz
import rz_fig_params # this adjust mpl.rcParams, almost nothing to import, import after scanpy to overwrite rc.Params
import rz_utility_spring as srz

python version: 3.6.7


## Load per-cell metadata (obs)

In [5]:
# load obs
obs = rz.load_df('backups/obs_info_32415x13_210821_11h29.npz')

## Load clustering results saved as categorical coloring in spring directory

In [6]:
path1 = "/Users/rapolaszilionis/Google Drive/DG/tmp_for_S3/20210616_RZ_macrophage_fastq_and_counts/quick_analysis_spring_plot/SPRING_dev-master/data/"
project_dir = path1+'mamito/'
plot_name = "all_above_900_UMAP_no_cc_2000"

In [7]:
# load current color dictionary
cg0 = srz.read_cell_groupings(project_dir+plot_name+'/categorical_coloring_data.json')

# color dictionary
cdd = {key:value['label_colors'] for key,value in cg0.items()}

# drop the color dictionaries with are part of cell groupings
cg0 = {key:value['label_list'] for key,value in cg0.items()}

# load cell index
cellix = np.loadtxt(project_dir+plot_name+'/cell_filter.txt',dtype=int)

In [8]:
cg0.keys()

dict_keys(['condition', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 'leiden_0.6', 'leiden_0.7', 'leiden_0.8', 'leiden_0.9', 'leiden_1.0', 'leiden_1.5', 'leiden_2.0', 'leiden_2.5', 'leiden_3.0', 'library', 'more_than_1000_total_counts', 'more_than_1500_total_counts', 'more_than_2000_total_counts', 'phase', 'population', 'w_sp_cl_all_above_900_UMAP_no_cc_2000_10', 'w_sp_cl_all_above_900_UMAP_no_cc_2000_11', 'w_sp_cl_all_above_900_UMAP_no_cc_2000_12', 'w_sp_cl_all_above_900_UMAP_no_cc_2000_15', 'w_sp_cl_all_above_900_UMAP_no_cc_2000_8', 'w_sp_cl_all_above_900_UMAP_no_cc_2000_9'])

In [9]:
# add a column with cluster label, select the level of granularity

label = 'leiden_0.6'
obs[label] = np.nan
obs.iloc[cellix,-1] = cg0[label]

## Dictionary cluster-to-name

In [10]:
#0.6
renamer = {
    "0":"AI", # activation 1 (higher in Il1B)
    "1":"MA", # metabolically active
    "2":"AII", # activation 2
    "3":"RS", # resting state
    "4":"SS", # high for CD52, LMO4, CHI3L1, COX5B 
    "5":"PA", # prone to activation
    "6":"Dead",
    "7":"HA", #high-activation
    "8":"IR", # interferon response (RZ, IFIT1,2,3, CXCL10, CXCL11, ISG15)
    "9":"AI", # small cluster (36 cells) made of 3 subclusters each characterized by the expression of
               # a single gene: AC083862.3, AC087623.2 and AC010542.4. Decision to merge
               # with AI is based on the fact that with leiden resolution 0.5, most of cluster 9 cells
               # end up with cluster 0 cells.
    "10":"AI", # another microcluster (16 cells) characterized by the expression of a single gene C9orf135.
           # also tends to be part of the same cluster as AI ("0") with lower leiden resolutions
}

pop_names = [renamer[i] for i in cg0[label]]

In [11]:
# add to obs
colname = 'population'
obs[colname] = np.nan
obs.iloc[cellix,-1] = pop_names

In [12]:
cg = {colname:pop_names}

## Also rename condition with more descriptive names

In [28]:
obs['condition'].unique()

array(['Control', 'KIPyV VLPs', 'MCPyV VLPs'], dtype=object)

In [16]:
condrenamer = {
    '1':'Control',
    '2':'KIPyV VLPs',
    '3':'MCPyV VLPs'}

In [17]:
obs['condition_numbered'] = obs['condition'].copy()

In [18]:
obs['condition'] = [condrenamer[i] for i in obs['condition_numbered']]

In [29]:
cg['condition'] = list(obs['condition'].values)

## Append colotrack to SPRING explorer

In [32]:
# load current color dictionary
cg0 = srz.read_cell_groupings(project_dir+plot_name+'/categorical_coloring_data.json')

# color dictionary of dictionaries
cdd = {key:value['label_colors'] for key,value in cg0.items()}

In [33]:
# append categorical colortrack
srz.append_cell_groupings(project_dir+plot_name,cg,colordd=cdd)

## Save obs

In [34]:
# no need to save the entire adata object, counts didn't change

fname = 'backups/obs_info_%dx%d_%s'%(obs.shape[0],obs.shape[1],rz.now())
print(fname)
rz.save_df(obs,fname)

backups/obs_info_32415x16_211010_11h50
