# Lang Augur

In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
from pathlib import Path
import sys

sys.path.insert(1, str(Path().cwd().parent))
import common_config

In [3]:
common_config.figure_journal_basic()

In [None]:
import pertpy as pt
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
adata = sc.read_h5ad("230114_hPCLS_perturbation.h5ad")

In [6]:
adata

AnnData object with n_obs × n_vars = 63581 × 17343
    obs: 'condition', 'modality', 'patient', 'treatment', 'identifier', 'n_counts', 'n_genes', 'percent_mito', 'QC_group', 'doublet_scores', 'size_factors', 'leiden_1', 'leiden_2', 'cell_type', 'cell_type_group', 'sample'
    var: 'n_cells', 'n_counts', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'highly_variable_nbatches', 'highly_variable_intersection'
    uns: 'QC_group_colors', 'cell_type_colors', 'cell_type_group_colors', 'condition_colors', 'dendrogram_cell_type', 'hvg', 'identifier_colors', 'leiden', 'leiden_1_colors', 'leiden_2_colors', 'modality_colors', 'name_colors', 'neighbors', 'patient_colors', 'pca', 'rank_genes_groups', 'sample_colors', 'timepoint_colors', 'treatment_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts', 'unsouped_counts'
    obsp: 'connectivities', 'distances'

In [8]:
ag_rfc = pt.tl.Augur('random_forest_classifier')

In [9]:
adata = adata[adata.obs.cell_type.isin(['Aberrant Basaloid','Myofibroblasts','ectopic EC','Profibrotic Macrophages','Pericytes'])]

In [10]:
adata1 = adata[adata.obs.treatment.isin(['FC','FC+Nintedanib'])].copy()
adata2 = adata[adata.obs.treatment.isin(['FC','FC+CMP4'])].copy()

In [11]:
adata1.obs['label'] = adata1.obs['treatment']

In [12]:
adata2.obs['label'] = adata2.obs['treatment']

In [13]:
loaded_data1 = ag_rfc.load(adata1)
loaded_data2 = ag_rfc.load(adata2)

In [14]:
loaded_data1.var['name'] = loaded_data1.var_names

In [15]:
loaded_data2.var['name'] = loaded_data2.var_names

In [16]:
nintedanib_adata, nintedanib_results = ag_rfc.predict(
    loaded_data1, n_subsamples=100, subsample_size=20, n_threads=32, select_variance_features=True)

nintedanib_results["summary_metrics"]

Output()

: 

In [None]:
ag_rfc.plot_lollipop(nintedanib_results)
plt.xlim([0.5, 0.75])
plt.savefig("figures/nintedanib_augur_lollipop.png", bbox_inches="tight")

In [None]:
important_features = ag_rfc.plot_important_features(nintedanib_results, top_n=15)
plt.savefig("figures/nintedanib_augur_feature_importance.png", bbox_inches="tight")

In [None]:
loaded_data2.var['name'] = loaded_data2.var_names

In [None]:
cmp4_adata, cmp4_results = ag_rfc.predict(
    loaded_data2, n_subsamples=100, subsample_size=20, n_threads=32, select_variance_features=True
)

cmp4_results['summary_metrics']

In [None]:
ag_rfc.plot_lollipop(cmp4_results)
plt.xlim([0.5, 0.75])
plt.savefig("figures/cmp4_augur_lollipop.png", bbox_inches="tight")

In [None]:
important_features = ag_rfc.important_features(cmp4_results, top_n=15)
plt.savefig("figures/cmp4_augur_feature_importance.png", bbox_inches="tight")

In [None]:
ag_rfc.plot_scatterplot(nintedanib_results, cmp4_results)

In [None]:
plt.savefig("figures/cmp4_vs_nintedanib_augur.png", bbox_inches="tight")

In [None]:
nintedanib_scores = nintedanib_results['summary_metrics'].loc['mean_augur_score',]
cmp4_scores = cmp4_results['summary_metrics'].loc['mean_augur_score',]

In [None]:
nintedanib_scores = pd.DataFrame(nintedanib_scores)
nintedanib_scores['treatment'] = 'Nintedanib'
nintedanib_scores.reset_index(inplace=True)
nintedanib_scores.rename(columns = {'index':'cell_type'}, inplace = True)
nintedanib_scores

In [None]:
cmp4_scores = pd.DataFrame(cmp4_scores)
cmp4_scores['treatment'] = 'cmp4'
cmp4_scores.reset_index(inplace=True)
cmp4_scores.rename(columns = {'index':'cell_type'}, inplace = True)
cmp4_scores

In [None]:
scores = pd.concat([cmp4_scores, nintedanib_scores], axis=0)
scores