In [None]:
import pandas as pd
from anndata import AnnData
from scipy.sparse import csr_matrix
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt

## take PDGFRB+ barcodes from initial run and mark them

In [None]:
adata = sc.read('/n/scratch/users/m/meb521/hyprseq/AML1026_merged_rna_all_clustered.h5ad') # 30k: all barcodes with 30 genes

In [None]:
plt.rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(adata, color=['PDGFRB','leiden'],s=10,show=False,frameon=False,legend_loc='on data');

In [None]:
#adata = adata[~adata.obs.leiden.isin(['7','4','14','0','1'])].copy() # will remove clogged
#adata = sc.read('/n/scratch/users/m/meb521/hyprseq/AML1026_merged_rna_all_clustered_withvars_filtered.h5ad') # 16k: clogged filterred

In [None]:
PDGFRB = adata[adata.obs.leiden.isin(['10','12'])].obs.index

## load clean data and remove

In [None]:
adata = sc.read('STAG_AML_scrublet_clustered.h5ad')

In [None]:
plt.rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(adata, color=['PDGFRB','leiden'],s=10,show=False,frameon=False,legend_loc='on data');

In [None]:
## take PDGFRB+ barcodes from initial run and mark them
# in doublet-removed, clogged remove, DNA-negative removed data

In [None]:
adata.obs['weird_PDGFRB'] = adata.obs.index.isin(PDGFRB)*1

In [None]:
adata.obs['weird_PDGFRB'] = adata.obs['weird_PDGFRB'].astype('category')

In [None]:
plt.rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(adata, color=['weird_PDGFRB','leiden'],s=10,show=False,frameon=False,legend_loc='on data')

In [None]:
#take clusters '9','5','16' from new clustering and remove them
adata = adata[~adata.obs.leiden.isin(['9','5','16'])].copy()

In [None]:
#remove weird_PDGFRB marked barcodes which still remain after removal of above clusters
adata = adata[adata.obs.weird_PDGFRB!=1].copy()

In [None]:
adata

## run new clustering

In [None]:
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=50) #n_neighbors=8, n_pcs=40)

In [None]:
sc.tl.umap(adata, min_dist=1)

In [None]:
sc.tl.leiden(adata, resolution=1) #resolution=.9)

In [None]:
plt.rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(adata, color=['leiden','doublet_scores'],s=10,show=False,frameon=False,legend_loc='on data',ncols=2,legend_fontsize=14,legend_fontoutline=4)


In [None]:
plt.rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(adata, color=['TOP2A','CD14','MPO'],s=10,show=False,frameon=False,legend_loc='on data',ncols=2,legend_fontsize=14,legend_fontoutline=4)


In [None]:
adata.write_h5ad('STAG_AML_final.h5ad',compression='gzip')