# Supplementary Notebook 2: Correlation between PTR and abundance
## Paper: Novel Approach for Microbiome Analysis Using Bacterial Replication Rates and Causal Inference to Determine Resistome Potential
### Vitalii Stebliankin, Musfiqur Sazal, Camilo Valdes, Kalai Mathee, and GiriNarasimhan

#### Dataset: Gibson et al. (BioProject ID: PRJNA301903)

### Get correlation between PTR and abundance
(reffered to Fig. 2A in the main manuscript)

In [1]:
import pandas as pd
from statsmodels.stats.multitest import multipletests
from scipy.stats import spearmanr
import os

out_dir = "analysis-out/2-CorrelationPTR-Abundance"
intermediate_dir = "{}/intermediate_files".format(out_dir)
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
# if not os.path.exists(intermediate_dir):
#     os.mkdir(intermediate_dir)


PTR_file = "analysis-out/1-FilteringPTR/PTR_species_filtered_metadata_major.csv"
ptr_df = pd.read_csv(PTR_file, index_col=0)
out_file = out_dir+"/PTR_abundance_corr.csv"

def get_species_list(ptr_df):
    columns = ptr_df.columns
    species_list=[]
    for col in columns:
        if "PTR" in col:
            species = col.replace("#PTR", "")
            species_list.append(species)
    print("Total of {} species in the dataset.".format(len(species_list)))
    return species_list

species_list = get_species_list(ptr_df)
species_list_PTR = [x+"#PTR" for x in species_list]
# ptr_only_df = ptr_df[species_list_PTR]
# ptr_only_df.columns = ptr_only_df.columns.str.replace("#PTR","")

ptr_dict = {"species":[],"spearmanr":[], "pval":[], "averageAbundance":[]}
for species in species_list:
    # Drop NA values:
    tmp_df = ptr_df[ptr_df[species+"#PTR"].notnull()]
    ptr_dict["species"].append(species)
    if len(tmp_df)>0:
        average_abundance = ptr_df[species+"#abundance"].mean()
        r, pval = spearmanr(tmp_df[species+"#PTR"], tmp_df[species+"#abundance"])
    else:
        r, pval = 0, 1
        average_abundance = 0
    ptr_dict["spearmanr"].append(r)
    ptr_dict["pval"].append(pval)
    ptr_dict["averageAbundance"].append(average_abundance)
df = pd.DataFrame(ptr_dict)
df = df[["species", "spearmanr", "pval", "averageAbundance"]]
df.to_csv(out_file)
# Get average abundance of every species
print(df)

Total of 25 species in the dataset.
                             species  spearmanr          pval  \
0              Enterococcus faecalis  -0.086650  2.501121e-01   
1           Klebsiella michiganensis   0.377790  2.684905e-03   
2                 Klebsiella oxytoca   0.015802  8.997858e-01   
3                   Escherichia coli  -0.190983  2.280365e-02   
4                Klebsiella sp. M5al  -0.191991  3.094435e-01   
5               Klebsiella aerogenes  -0.166470  1.255469e-01   
6               Enterobacter cloacae  -0.482832  4.162081e-07   
7            Enterobacter hormaechei  -0.477694  5.673820e-06   
8                Veillonella parvula  -0.193368  1.612232e-01   
9         Enterobacter sp. DKU_NT_01  -0.294388  4.004715e-02   
10        Enterobacter sp. CRENT-193  -0.324914  7.771976e-03   
11        Staphylococcus epidermidis   0.186319  1.171019e-01   
12              Enterococcus faecium   0.565128  2.627672e-03   
13              Bacteroides fragilis  -0.407792  6.650