In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
from matplotlib import pyplot as plt
import scvelo as scv
import anndata as ad
import seaborn as sns

In [None]:
sc.settings.verbosity=3
sc.logging.print_header()

In [None]:
oviduct = sc.read_h5ad('/home/j87832lw/oviduct/oviduct.all/oviduct_concat.h5ad')
oviduct.var_names_make_unique()
oviduct = oviduct[~oviduct.obs['Patient'].isnull()]
oviduct

In [None]:
endo  = sc.read_h5ad('/home/j87832lw/oviduct/endometrium/garciacorrected.h5ad')
endo.var_names_make_unique()

In [None]:
menopausal_dict = { 'Proliferative':['patient 1','patient 4','patient 6'],
            'Secretory':['patient 3','patient 5','patient 7','patient 9','patient 12'],
            'Peri-menopausal':['patient 4','patient 10','patient 14','patient 11'],
            'Post-menopausal':['patient 8','patient 13','patient 15'],}

oviduct.obs['menstrual_status'] = np.nan
for i in menopausal_dict.keys():
    ind = pd.Series(oviduct.obs.Patient).isin(menopausal_dict[i])
    oviduct.obs.loc[ind,'menstrual_status'] = i

oviduct.obs['menstrual_status']= oviduct.obs['menstrual_status'].astype('category')
oviduct.obs['menstrual_status']

In [None]:
age_dict = { 47:['patient 1','patient 3'], 41:['patient 4'], 33:['patient 5'],
            31:['patient 6'],
            46:['patient 7','patient 9'],
            62:['patient 8'],
            52:['patient 10'],
            50:['patient 11'],
            45:['patient 12'],
            64:['patient 13'],
            53:['patient 14'],
            55:['patient 15'],}

oviduct.obs['Age'] = np.nan
for i in age_dict.keys():
    ind = pd.Series(oviduct.obs.Patient).isin(age_dict[i])
    oviduct.obs.loc[ind,'Age'] = i

oviduct.obs['Age']= oviduct.obs['Age'].astype('category')
oviduct.obs['Age']

In [None]:
cycleday_dict = {'day 3':['patient 1'],
            'day 17':['patient 3'],
            'day 7':['patient 4'],
            'day 31':['patient 5'],
            'day 13':['patient 6'],
            'day 23':['patient 7'],
            'day 28':['patient 9'],
            'day 37':['patient 10'], 
            'unknown':['patient 11','patient 12','patient 14'],
            'post-menopausal':['patient 8','patient 13','patient 15']}

oviduct.obs['cycle_day'] = np.nan
for i in cycleday_dict.keys():
    ind = pd.Series(oviduct.obs.Patient).isin(cycleday_dict[i])
    oviduct.obs.loc[ind,'cycle_day'] = i

oviduct.obs['cycle_day']= oviduct.obs['cycle_day'].astype('category')
oviduct.obs['cycle_day']

In [None]:
oviendo = ad.concat([oviduct, endo], axis=0, join='inner')

In [None]:
oviendo.var['mt']=oviendo.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(oviendo, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(oviendo, ['n_genes_by_counts','total_counts','pct_counts_mt'],jitter=0.4, multi_panel=True)
sc.pl.scatter(oviendo, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(oviendo, x='total_counts', y='n_genes_by_counts')

In [None]:
sc.pp.filter_cells(oviendo, min_genes=200)
sc.pp.filter_genes(oviendo, min_cells=3)

oviendo=oviendo[oviendo.obs.n_genes_by_counts <6000,:]
oviendo=oviendo[oviendo.obs.total_counts <8500000,:]
oviendo=oviendo[oviendo.obs.pct_counts_mt <30,:]

In [None]:
sc.pp.normalize_total(oviendo, target_sum=1e4) 
sc.pp.log1p(oviendo)
sc.pp.highly_variable_genes(oviendo, min_mean=0.0125, max_mean=3, min_disp=0.5, batch_key='Patient')
sc.pl.highly_variable_genes(oviendo)
oviendo.raw = oviendo
oviendo = oviendo [:, oviendo.var.highly_variable]
sc.pp.scale(oviendo) #max_value=10)

In [None]:
sc.tl.pca(oviendo, svd_solver='arpack')
sc.pl.pca_variance_ratio(oviendo)
sc.pp.neighbors(oviendo,n_neighbors=100, n_pcs=25)
sc.tl.leiden(oviendo,resolution=1.7)
sc.tl.umap(oviendo)
sc.pl.umap(oviendo, color=['Author','leiden','OVGP1','CAPS','FOXJ1','PAX8','EPCAM'])


In [None]:
cell_types = {'Ciliated':['10','26'],
 'OVGP1+ Secretory': ['25'], 
 'Glandular Secretory': ['19','15'],
 'SOX9+ Epithelial':['24','31'],
'SOX9+, LGR5+':['30'],
'Lumenal Epithelial':['8'],
'Glandular Epithelial':['18','5'],
'Smooth Muscle':['3','1','39'],
'Endothelial':['37','36','7','45'],
'Decidualised stroma':['22'],
'Stroma':['0','22','13','6','32','29','11','9','35','4','38','33','20','42','44','40'],
'Leukocytes': ['17','16','34','12','21','27','28','2'],
'Myeloid':['23'],
'Mast':['41'],
'Plasma/B cell':['43'],
 'EPCAM+':['14']}

oviendo.obs['Cell_Types'] = np.nan

for i in cell_types.keys():
    ind = pd.Series(oviendo.obs.leiden).isin(cell_types[i])
    oviendo.obs.loc[ind,'Cell_Types'] = i

oviendo

In [None]:
tissue = {'Uterine':['GA','Wang'],
          'Fallopian Tube':['Dinh_2021','Hu_2020','Ulrich_2022']}

oviendo.obs['Tissue_type'] = np.nan

for i in tissue.keys():
    ind = pd.Series(oviendo.obs.Author).isin(tissue[i])
    oviendo.obs.loc[ind,'Tissue_type'] = i

sc.pl.umap(oviendo, color=['Tissue_type']

In [None]:
results_file = '/home/j87832lw/oviduct/oviduct.all/alldatasetclustering.h5ad'
oviendo.write(results_file)

In [None]:
tissuetype_dict= {'FT secretory':['OVGP1+ Secretory'],
                    'Endometrial secretory':['SOX9+ Epithelial','Glandular Secretory','Glandular Epithelial']}
                                             
oviendo.obs['tissuetype'] = np.nan
for i in tissuetype_dict.keys():
    ind = pd.Series(oviendo.obs.Cell_Types).isin(tissuetype_dict[i])
    oviendo.obs.loc[ind,'tissuetype'] = i   