In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
from matplotlib import pyplot as plt
import scvelo as scv
import anndata as ad
import seaborn as sns

In [None]:
sc.settings.verbosity=3
sc.logging.print_header()

In [None]:
garcia = sc.read_h5ad('/home/j87832lw/mounting/endometrium/garciacorrected.h5ad')
garcia

In [None]:
sc.pl.highest_expr_genes(garcia, n_top=20)
garcia.var['mt']=garcia.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(garcia, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(garcia, ['n_genes_by_counts','total_counts','pct_counts_mt'],jitter=0.4, multi_panel=True)
sc.pl.scatter(garcia, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(garcia, x='total_counts', y='n_genes_by_counts')

In [None]:
sc.pp.filter_cells(garcia, min_genes=200)
sc.pp.filter_genes(garcia, min_cells=3)

garcia=garcia[garcia.obs.n_genes_by_counts <7000,:]
garcia=garcia[garcia.obs.total_counts <50000,:]
garcia=garcia[garcia.obs.pct_counts_mt <30,:]

In [None]:
sc.pp.normalize_total(garcia, target_sum=1e4) 
sc.pp.log1p(garcia)
garcia

In [None]:
sc.pp.highly_variable_genes(garcia, min_mean=0.0125, max_mean=3, min_disp=0.5, batch_key='Patient')
sc.pl.highly_variable_genes(garcia)
garcia.raw = garcia
garcia = garcia[:, garcia.var.highly_variable]
garcia

In [None]:
sc.pp.scale(garcia)
sc.tl.pca(garcia, svd_solver='arpack', n_comps=150)
sc.pl.pca_variance_ratio(garcia)

In [None]:
sc.pp.neighbors(garcia, n_neighbors=300, n_pcs=15)
sc.tl.leiden(garcia,resolution = 1)
sc.tl.umap(garcia)

In [None]:
sc.pl.umap(garcia, color=['Author','leiden','SOX9','LGR5','PAEP','SCGB2A2','EPCAM','CAPS','FOXJ1','PAX8','EPCAM',
                          'LYZ','PTPRC','CCL14','DCN','JCHAIN','PAEP','SOX9','LGR5','SCGB1D2','PTGS1', 
                          'EPCAM','PIFO','FOXJ1','CCNO','SOX9','PAEP','THBS1','PLAU',
                'SCGB1D2','GPX3','CXCL14','KRT17','KRT8','ESR1','PTGS1','CPM','PAX8','RUNX3'], save='garciamarkers.pdf')


In [None]:
sc.tl.rank_genes_groups(garcia,'leiden', method='wilcoxon')
topmarkers =garcia.uns['rank_genes_groups']
groups = topmarkers['names'].dtype.names
garciamarkers = pd.DataFrame({group +'_'+key[:1]:topmarkers[key][group]
              for group in groups for key in ['names','pvals']})

sc.tl.dendrogram(garcia, groupby='leiden')

sc.pl.rank_genes_groups_heatmap(garcia, n_genes=25, swap_axes=True, show_gene_labels=False,
                                vmin=-3, vmax=3 )

In [None]:
cell_dict= {'T cell': ['10'],
'Myeloid':['25'],
'Ciliated': ['16'], 
'Sox9+ Epithelial': ['2','14','19','24','29'],
'Lumenal Epithelial': ['4','5','11','13','26'],
'Glandular Epithelial': ['9','12','17'],
'Smooth Muscle':['3','6','23'],
'Non-decidualised Stroma':['1','28','22'],
'Decidualised Stroma':['15','0','7','18','27'],
'Endothelial':['8','20'],
'Natural Killer Cell':['21']}

garcia.obs['Cell_Types'] = np.nan

for i in cell_dict.keys():
    ind = pd.Series(garcia.obs.leiden).isin(cell_dict[i])
    garcia.obs.loc[ind,'Cell_Types'] = i

In [None]:
sc.pl.umap(garcia, color=['Cell_Types'], legend_loc='on data', save='garciaannotatedcellmap.png')

In [None]:
garciaclustered = '/home/j87832lw/oviduct/oviduct.all/garciallclustered.h5ad' 
garcia.write(garciaclustered)