In [None]:
## Concatenate all fallopian tube datasets

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
from matplotlib import pyplot as plt
import scvelo as scv
import anndata as ad
import seaborn as sns

In [None]:
sc.settings.verbosity=3
sc.logging.print_header()

In [None]:
dinh = sc.read_loom('/home/j87832lw/mounting/oviduct/data/Dinh.h5ad')
dinh.var_names_make_unique()
dinh.obs['Patient']= dinh.obs['Patient'].astype('category')
dinh.obs['Patient']

In [None]:
ulrich = sc.read_h5ad('/home/j87832lw/mounting/oviduct/data/ulrich_all.h5ad')
ulrich = ulrich[ulrich.obs['Source'] == 'Surgical',:].copy()
ulrich.obs['Patient']= ulrich.obs['Patient'].astype('category')
ulrich.var_names_make_unique()
ulrich.obs['Patient'] = ulrich.obs['Patient'].cat.rename_categories({'FT3': 'patient 9'})
ulrich.obs['Patient'] = ulrich.obs['Patient'].cat.rename_categories({'FT1': 'patient 10'})
ulrich.obs['Patient']

In [None]:
hu = sc.read_h5ad('/home/j87832lw/mounting/oviduct/data/hu_raw.h5ad') 
hu.var_names_make_unique()
hu.obs['Patient']= hu.obs['Patient'].astype('category')
hu.obs['Patient'] = hu.obs['Patient'].cat.rename_categories({34350.0: 'patient 11'})
hu.obs['Patient'] = hu.obs['Patient'].cat.rename_categories({34659.0: 'patient 12'})
hu.obs['Patient'] = hu.obs['Patient'].cat.rename_categories({33572.0: 'patient 13'})
hu.obs['Patient'] = hu.obs['Patient'].cat.rename_categories({35773.0: 'patient 14'})
hu.obs['Patient'] = hu.obs['Patient'].cat.rename_categories({33778.0: 'patient 15'})
hu = hu[hu.obs['Patient'].isin(['patient 11','patient 12','patient 13','patient 15']),:].copy()
hu.obs['Patient']

In [None]:
oviduct = ad.concat([ulrich, dinh, hu], axis=0, join='inner')
oviduct.obs['Patient']= oviduct.obs['Patient'].astype('category')
oviduct.obs['Patient']
oviduct_concat = '/home/j87832lw/mounting/oviduct/oviduct.all/oviduct_concat.h5ad' 
oviduct.write(oviduct_concat)

In [None]:
oviduct = sc.read_h5ad('/home/j87832lw/mounting/oviduct/oviduct.all/oviduct_concat.h5ad')
oviduct.var['mt']=oviduct.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(oviduct, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(oviduct, ['n_genes_by_counts','total_counts','pct_counts_mt'],jitter=0.4, multi_panel=True)

In [None]:
sc.pp.filter_cells(oviduct, min_genes=200)
sc.pp.filter_genes(oviduct, min_cells=3)

oviduct=oviduct[oviduct.obs.n_genes_by_counts <8000,:]
oviduct=oviduct[oviduct.obs.total_counts <8500000,:]
oviduct=oviduct[oviduct.obs.pct_counts_mt <30,:]

In [None]:
sc.pp.normalize_total(oviduct, target_sum=1e4) 
sc.pp.log1p(oviduct)
sc.pp.highly_variable_genes(oviduct, min_mean=0.0125, max_mean=3, min_disp=0.5, batch_key='Patient')
sc.pl.highly_variable_genes(oviduct)

In [None]:
oviduct.raw = oviduct
oviduct = oviduct [:, oviduct.var.highly_variable]
sc.pp.scale(oviduct)

In [None]:
sc.tl.pca(oviduct, svd_solver='arpack')
sc.pl.pca_variance_ratio(oviduct)

sc.pp.neighbors(oviduct, n_pcs=8)
sc.tl.leiden(oviduct,resolution=1.6)
sc.tl.paga(oviduct)
sc.pl.paga(oviduct)
sc.tl.umap(oviduct, init_pos='paga')
sc.pl.umap(oviduct, color=['Author','leiden','OVGP1','CAPS','FOXJ1','PAX8','EPCAM'])


In [None]:
cell_dict = {'Unspecified epithelial':['13','38','15'],
 'T cell': ['0','1','2','3','5','9','10','19','22','33','40'],
'Myeloid':['25','27','30','37','41'],
 'Ciliated': ['20','21','23','24'], 
 'Secretory': ['12','28','29'],
'Smooth Muscle':['8','18','43'],
'Fibroblast':['4','6','7','11','14','16','32'],
'Endothelial':['17','31','34','35','39','42'],
'Mast':['26'],
'Plasma/B cell':['36']}
oviduct.obs['Cell_Types'] = np.nan

for i in cell_dict.keys():
    ind = pd.Series(oviduct.obs.leiden).isin(cell_dict[i])
    oviduct.obs.loc[ind,'Cell_Types'] = i

In [None]:
sc.pl.umap(oviduct, color=['Cell_Types'])
sc.pl.umap(oviduct, color=['OVGP1','FOXJ1','CCL14','DCN'])
sc.pl.umap(oviduct, color=['JCHAIN','TRAC','PTPRC','TPSAB1','LYZ','FOXJ1','PIFO','EPCAM','OVGP1','PAX8','CCL14','DCN',
               'ACTA2'])

In [None]:
marker_genes= ['JCHAIN','TRAC','PTPRC','TPSAB1','LYZ','DCN',
               'ACTA2','FOXJ1','PIFO','CCL14','OVGP1','EPCAM','PAX8',]
sc.tl.dendrogram(oviduct, groupby='Cell_Types')
dp = sc.pl.dotplot(oviduct, marker_genes, groupby='Cell_Types', dendrogram=True)

In [None]:
results_file = ('/home/j87832lw/oviduct/oviduct.all/version2oviduct_concat.h5ad')
oviduct.write(results_file)