In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scanpy as sc
import numpy as np
import pandas as pd
from matplotlib.pyplot import rc_context
pd.set_option('display.max_rows', 200)
#sc.logging.print_versions()
sc.logging.print_header()
sc.settings.verbosity = 3  
sc.settings.set_figure_params(dpi=120, color_map='viridis')

scanpy==1.9.1 anndata==0.8.0 umap==0.5.3 numpy==1.23.4 scipy==1.9.3 pandas==1.5.1 scikit-learn==1.1.3 statsmodels==0.13.5 python-igraph==0.10.2 pynndescent==0.5.8


In [2]:
steroid_pos_only = sc.read('/home/jovyan/scripts/renal_covid_19/scripts_for_publication/trend_of_cell_with_steroid/steroid_pos_only_all_monocyte.h5ad') #include only the patients in wave 2 who got steroid and only pos cases
steroid_pos_only

AnnData object with n_obs × n_vars = 12802 × 1215
    obs: 'annotation22_ES', 'centre', 'sample_id', 'sample_id_broad', 'sample_date', 'sample_date_yr', 'pool', 'pool_broad', 'haniffa_broad_predLabel', 'orig.ident', 'merged_souporcell_cluster', 'merged_souporcell_status', 'patient_id', 'case_control', 'WHO_severity', 'sex', 'calc_age', 'discharge_date', 'date_positive_swab', 'date_first_symptoms', 'admission_date', 'ethnicity', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden_0.8', '5_subcluster', '9_subcluster', 'annotation', 'annotation23_ES', 'annotation24_ES', 'annot3', 'annot2', 'annot1', 'leiden_0.5', 'annot4', 'wave_case_control', 'steroid_status', 'time_from_first_symptoms', 'time_from_positive_swab', 'time_from_infection', 'steroid_date', 'days_from_steroid', 'steroid_timeline', 'WHO_temp_severity', 'steroid_severity'
    var: 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersi

In [3]:
list(steroid_pos_only.obs['annot4'].unique())

['CD14mono_anti_inflammatory',
 'CD16mono',
 'CD14mono',
 'CD14mono_IFN',
 'CD16mono_IFN',
 'CD16mono_C1',
 'Int.mono',
 'CD14mono_activated']

In [4]:
steroid_pos_only.obs['patient_id'].unique()

['C138', 'C101', 'C145', 'C147', 'C146', 'C33']
Categories (6, object): ['C101', 'C138', 'C145', 'C146', 'C147', 'C33']

In [5]:
steroid_pos_only.obs['case_control'].unique()

['POSITIVE']
Categories (1, object): ['POSITIVE']

In [6]:
steroid_pos_only.obs['annot4'] = steroid_pos_only.obs['annot4'].str.replace('CD14mono_activated','CD14mono')
steroid_pos_only.obs['annot4'] = steroid_pos_only.obs['annot4'].str.replace('CD14mono_anti_inflammatory','CD14mono_alt_act')

In [7]:
list(steroid_pos_only.obs['annot4'].unique())

['CD14mono_alt_act',
 'CD16mono',
 'CD14mono',
 'CD14mono_IFN',
 'CD16mono_IFN',
 'CD16mono_C1',
 'Int.mono']

In [8]:
steroid_pos_only.obs['steroid_timeline_new']='nan'

steroid_pos_only.obs.loc[(steroid_pos_only.obs["steroid_timeline"]=='before_steroid_pos'), "steroid_timeline_new"] = 'groupA_before_steroid_pos'
steroid_pos_only.obs.loc[(steroid_pos_only.obs["steroid_timeline"]=='after_steroid_pos'), "steroid_timeline_new"] = 'groupB_after_steroid_pos'

In [9]:
steroid_pos_only.obs['case_control'].unique()

['POSITIVE']
Categories (1, object): ['POSITIVE']

In [10]:
steroid_pos_only.obs['WHO_temp_severity'].unique()

['mild', 'severe', 'moderate', 'critical']
Categories (4, object): ['critical', 'mild', 'moderate', 'severe']

In [11]:
import anndata2ri
anndata2ri.activate()
%load_ext rpy2.ipython

In [12]:
%%R
## Count cells in neighbourhoods
countCells_ <- function(x, samples, meta.data=NULL){

    # cast dplyr objects to data.frame
    if(!is.data.frame(meta.data) & !is.null(meta.data)){
        meta.data <- as.data.frame(meta.data)
    }

    if(length(samples) > 1 & !is.null(meta.data)){
        stop("Multiple sample columns provided, please specify a unique column name")
    } else if(is.null(meta.data) & length(samples) != ncol(x)){
        stop(paste0("Length of vector does not match dimensions of object. Length:",
                    length(samples), " Dimensions: ", ncol(x)))
    }

    # check the nhoods slot is populated
    if(ncol(nhoods(x)) == 1 & nrow(nhoods(x)) == 1){
        stop("No neighbourhoods found. Please run makeNhoods() first.")
    }

    message("Checking meta.data validity")
    if(!is.null(meta.data)){
        samp.ids <- unique(as.character(meta.data[, samples]))
    } else {
        samp.ids <- unique(as.character(samples))
    }

    num.hoods <- ncol(nhoods(x))

    ## Convert meta data to binary dummies in sparse matrix
    dummy.meta.data <- Matrix(data=0, nrow=nrow(meta.data), ncol = length(samp.ids), sparse = TRUE)
    colnames(dummy.meta.data) <- samp.ids
    rownames(dummy.meta.data) <- rownames(meta.data)
    for (s in seq_along(samp.ids)){
        i.s <- samp.ids[s]
        s.ixs <- which(meta.data[samples]==i.s)
        dummy.meta.data[s.ixs, as.character(i.s)] <- 1
    }

    message("Counting cells in neighbourhoods")
    count.matrix <- Matrix::t(nhoods(x)) %*% dummy.meta.data

    # add to the object
    rownames(count.matrix) <- c(1:num.hoods)
    nhoodCounts(x) <- count.matrix

    return(x)
}

In [13]:
# store the neighborhood graph separately
steroid_pos_only_no_knnx = steroid_pos_only.copy()


In [14]:
# recompute the neighborhood
sc.pp.neighbors(steroid_pos_only_no_knnx, use_rep="X_pca_harmony_orig.ident")

computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:19)


In [15]:
# Save the connectivity matrix
knn_adjacencyx = steroid_pos_only_no_knnx.obsp["connectivities"]

In [16]:
steroid_pos_only_no_knnx.uns.pop("X_pca_harmony_orig.ident")
steroid_pos_only_no_knnx.uns.pop("neighbors")
#steroid_pos_only_no_knnx.obsm.pop("_scvi_extra_categoricals")
#steroid_pos_only_no_knnx.obsm.pop("_scvi_extra_continuous")
steroid_pos_only_no_knnx.obsp = None


In [17]:
knn_adjacencyx = knn_adjacencyx.A

In [18]:
%%R -i steroid_pos_only_no_knnx -i knn_adjacencyx
setwd('/home/jovyan/scripts/renal_covid_19/scripts_for_publication/trend_of_cell_with_steroid/')
getwd()
save(countCells_, steroid_pos_only_no_knnx, knn_adjacencyx, file = 'steroid_pos_only.RData')

  return AnnData(exprs, obs, var, uns, obsm or None, layers=layers)


In [19]:
steroid_pos_only_no_knnx.obs['annot4'].unique()

array(['CD14mono_alt_act', 'CD16mono', 'CD14mono', 'CD14mono_IFN',
       'CD16mono_IFN', 'CD16mono_C1', 'Int.mono'], dtype=object)

In [20]:
'C141' in list(steroid_pos_only_no_knnx.obs['sample_id_broad'].unique())

False