In [200]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import seaborn as sns

from scipy.stats import pearsonr

sns.set_style("darkgrid")
np.random.seed(930525)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 200)

warnings.simplefilter('once')

%matplotlib inline
%load_ext watermark
%watermark --iversions

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
numpy   1.18.4
seaborn 0.10.1
pandas  1.0.4



In [201]:
data_folder = "/mnt/btrfs/data/italy"

In [202]:
metadata_file = "data/metadata.csv"

In [203]:
df_metadata_file = pd.read_csv(metadata_file, sep=",")

In [204]:
df_metadata_file.columns

Index(['sample_id', 'Sample_Number', 'Sample_Code', 'Shotgun', '16s',
       'viral_load', 'nCoVN1', 'nCoVN2', 'RNase P', 'ConcN1', 'ConcN2', 'N1',
       'Sex', 'Age', 'Age_range', 'Age_average', 'Blood_sampling_date',
       'Swab date', 'Severe_Case', 'REUMA', 'PNEUMO', 'ONCO', 'HEMATO',
       'DIABETES', 'METABOLIC', 'CARDIO', 'HYPERTENSION', 'NEPHRO', 'TB',
       'HIV', 'IMMUNOSUPPRESSION', 'HEPATOPATHY', 'HCQ', 'LPV', 'HCQ_LPV',
       'HCQ_LPV_01', 'HCQ_LPV_02', 'RDV', 'TCZ', 'BCT', 'Steroide',
       'SYMPTOMS_ONSET', 'SYMPTOMS_TO_SWAB_DAYS', 'Swab_day_quartile',
       'Set_severe', 'swab_days_avg', 'SD_q1_severe', 'SD_q1_HB02', 'swab_out',
       'OUTCOME', 'Hospitalization_Days', 'CRP', 'CRP_range', 'LDH',
       'LDH_range', 'Ferritin', 'CPK', 'CPK_range', 'WBC', 'WBC_range',
       'Neutrophil_Count', 'AST', 'ALT', 'Albumin', 'D.Dimer', 'Lowest_SpO2',
       'Hospital_bin', 'Hospital_bin_01', 'Hospital_bin_02', 'q1', 'bin_02',
       'HCQ.1', 'LPV.1', 'HB_02_HCQ', 'HB_02

In [205]:
df_metadata_file["RDV"].value_counts()

No     68
Yes    32
Name: RDV, dtype: int64

In [206]:
taxatable = "/mnt/btrfs/data/italy/taxatable/filter_qc_taxatable.txt"

In [207]:
df_taxa_raw = pd.read_csv(taxatable, sep="\t", index_col=0)

In [208]:
columns = df_taxa_raw.columns

In [209]:
sample_ids = ["_".join(_.split(".")[:2]) if not _.startswith("pos") else "_".join(_.split(".")[:3]) for _ in columns]

In [210]:
df_taxa = df_taxa_raw.copy().T

In [211]:
df_taxa_ra = df_taxa.apply(lambda x: x / x.sum(), axis=1)

print(df_taxa_ra.sum(axis=1))

pos.dna.02.S35.001.fa    1.0
CAPMA.83.S12.001.fa      1.0
FOGI.63.S13.001.fa       1.0
COCL.11.S7.001.fa        1.0
WACH.38.S1.001.fa        1.0
PRPI.42.S31.001.fa       1.0
RODE.80.S8.001.fa        1.0
pos.dna.01.S17.001.fa    1.0
MEEN.93.S11.001.fa       1.0
DETH.41.S27.001.fa       1.0
TECA.45.S15.001.fa       1.0
BODA.52.S9.001.fa        1.0
CHMA.61.S3.001.fa        1.0
ASTER.02.S34.001.fa      1.0
BIRO.74.S19.001.fa       1.0
CAAU.07.S2.001.fa        1.0
DIGMA.71.S21.001.fa      1.0
FAFE.16.S10.001.fa       1.0
OPSE.53.S5.001.fa        1.0
LAES.91.S29.001.fa       1.0
DOEC.81.S30.001.fa       1.0
EKJO.64.S32.001.fa       1.0
DIAL.15.S23.001.fa       1.0
GIFI.21.S22.001.fa       1.0
TAOT.65.S24.001.fa       1.0
DEFI.14.S6.001.fa        1.0
MUNA.98.S28.001.fa       1.0
ROAL.75.S33.001.fa       1.0
MERI.28.S20.001.fa       1.0
BUMA.05.S4.001.fa        1.0
MOEM.48.S16.001.fa       1.0
PAMA.46.S18.001.fa       1.0
VUGE.37.S14.001.fa       1.0
TOSA.76.S25.001.fa       1.0
DEIS.70.S26.00

In [212]:
(df_taxa.mean(axis=0) / df_taxa.mean().sum())[genera]

#OTU ID
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_atypica             2.851332e-03
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_rogosae             3.755483e-03
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_parvula             1.260326e-03
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_dispar              2.185924e-04
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_tobetsuensis        2.696719e-04
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_parvula_A           2.443783e-03
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_sp90055244

In [213]:
(((df_taxa > 0).sum(axis=0)) / df_taxa.shape[0])[genera]

#OTU ID
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_atypica             0.971429
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_rogosae             0.971429
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_parvula             0.914286
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_dispar              0.914286
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_tobetsuensis        0.771429
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_parvula_A           0.942857
k__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella_sp900552445         0.857143
k__Ba

In [214]:
genus = "g__Veillonella"

genera = [col for col in df_taxa.columns if genus in col]

for species in genera:
    print(df_taxa_ra.loc[:, species].mean())

0.0036772251058606437
0.0056389236160887075
0.002186069515610472
0.00041927854251735307
0.0003574163793665351
0.0040247657932257945
0.0005146253983254719
6.105916470448266e-05
0.0011684249338513872
0.00020648962167528102
4.72115014996214e-06
0.00029510066075474765
0.00015088308544970556
0.0004143575493339903
0.0007157524851392719
0.00015809464732849724
3.595941007504857e-05
2.2662311391491095e-05
0.00021912281026804502
4.14592475827635e-07
7.859542155777695e-07
0.00011046211409347521
7.075057341688369e-06
9.283664667603348e-08
1.7363778223646576e-05
6.744169158907973e-05
1.5316721072936311e-09
3.123461795789539e-08
1.5660990319765224e-06


In [215]:
df_taxa.shape

(35, 7513)

In [216]:
# we only want species present in greater than 95% of the samples
mask_low_prevalence = ((df_taxa > 0).sum(axis=0)) / df_taxa.shape[0] >= .8

In [217]:
# mask low abundance
mask_low_abundance = (df_taxa.median(axis=0) / df_taxa.median().sum()) >= .0001

mask_low_abundance.sum()

455

In [218]:
df_taxa = df_taxa.loc[:, mask_low_prevalence & mask_low_abundance]

In [219]:
df_taxa_export = df_taxa.reset_index().copy()

In [221]:
list(df_taxa_export.columns)[-1]

'k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Clostridium_Q;s__Clostridium_Q_saccharolyticum_A'

In [222]:
sample_ids = ["_".join(_.split(".")[:2]) if not _.startswith("pos") else "_".join(_.split(".")[:3]) for _ in df_taxa_export["index"]]

df_taxa_export["index"] = sample_ids
df_taxa_export.loc[df_taxa_export["index"] == "MERI_28", "index"] = "MEIR_28"

In [223]:
cols = list(df_taxa_export.columns)

cols[0] = "SampleID"

df_taxa_export.columns = cols

In [230]:
df_taxa_export.T.to_csv("./data/shogun.98.p8.la0001.csv", index=True, header=False)

In [59]:
from skbio.stats.composition import clr

  collections.Sequence, SkbioObject):
  from collections import Iterable


In [60]:
df_taxa_clr = df_taxa.copy()

In [61]:
df_taxa_clr = pd.DataFrame(clr(df_taxa + 1))

In [62]:
df_taxa_clr.columns = df_taxa.columns
df_taxa_clr.index = df_taxa.index

In [63]:
df_taxa_clr

#OTU ID,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola_vulgatus,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_fragilis,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli_D,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_flexneri,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_bouchesdurhonensis,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides_distasonis_A,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_uniformis,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_faecis,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_thetaiotaomicron,...,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Porphyromonadaceae;g__Porphyromonas;s__Porphyromonas_uenonis_A,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella_sp001553265,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella_oris,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Lawsonibacter;s__Lawsonibacter_sp900066825,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Faecalimonas;s__Faecalimonas_sp900556835,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Hungatella_A;s__Hungatella_A_sp003478355,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Lawsonibacter;s__Lawsonibacter_sp000177015,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Paraprevotella;s__Paraprevotella_sp003477995,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_oleiciplenus,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-81;s__CAG-81_sp900066785
pos.dna.02.S35.001.fa,12.564805,12.596202,10.432703,10.38242,10.717464,-0.529343,-0.683494,5.178716,1.714401,2.002083,...,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253
CAPMA.83.S12.001.fa,1.502473,0.1089,8.026002,6.006419,4.688253,-3.136293,0.814951,1.198817,3.197282,2.198838,...,3.13894,4.841704,3.443421,-2.730828,-2.124692,-0.929852,-4.522588,-2.443146,-3.67529,-3.82944
FOGI.63.S13.001.fa,3.594296,-0.61124,0.322862,-0.505445,-2.583584,-0.913521,-0.274768,2.71489,0.887994,2.404863,...,2.407628,1.353132,-0.121505,-2.117494,-3.004797,-1.820056,-3.537014,-2.626143,-1.696893,-2.913825
COCL.11.S7.001.fa,1.536088,1.074962,4.6631,3.188711,1.774582,-2.081677,-2.133863,1.997295,2.537797,4.726931,...,2.444156,4.645267,1.707124,-2.901118,-3.214776,-2.330573,-4.213305,-3.594266,-0.489334,-3.85663
WACH.38.S1.001.fa,4.240597,-0.421227,2.999156,1.341303,0.023436,-1.574201,-1.208627,2.68513,0.050759,1.814724,...,-2.117196,3.141126,1.670049,-2.016654,-1.179972,-1.957565,-4.815676,-1.953475,-2.093441,-1.995148
PRPI.42.S31.001.fa,3.169797,-1.43864,7.172793,4.708191,3.824696,-0.532085,0.522833,2.376027,1.803052,1.17211,...,2.369289,-5.137255,-6.075525,0.473331,-1.156355,-0.917108,-1.554945,-1.227844,-0.051808,-1.882509
RODE.80.S8.001.fa,5.918568,-1.343871,4.211689,4.242758,4.215763,-0.235021,1.030012,2.403992,1.124451,2.924393,...,3.704593,1.527456,-1.924759,-0.191832,-1.928109,-1.326253,-1.318927,-1.467165,-2.030491,-1.091414
pos.dna.01.S17.001.fa,10.758325,10.466699,8.427199,8.411471,8.758813,-1.616304,-0.923157,3.4861,2.323658,1.673341,...,-0.923157,-1.010168,-1.798626,-0.667223,-0.635475,-1.010168,-2.309451,-2.714916,-2.1553,-1.703315
MEEN.93.S11.001.fa,4.110142,6.091029,4.004449,3.807656,2.422215,-0.461099,0.197382,3.913378,1.41049,4.790671,...,-2.942135,-2.301677,-2.368428,0.770305,1.697052,0.457184,-0.315932,-3.596317,-1.389233,-1.218305
DETH.41.S27.001.fa,3.183976,-3.761992,-0.928224,-2.9201,-3.462587,0.307518,-3.401551,1.633435,0.197562,-0.076657,...,1.070765,0.845063,0.288315,1.436088,-1.135138,-0.484491,-1.364955,-0.362683,-2.735072,0.163711


In [64]:
from skbio.stats.composition import ancom

In [65]:
mask_controls = [not _.startswith("pos") for _ in df_taxa.index]

In [66]:
df_taxa.index = sample_ids

df_taxa_nc = df_taxa.copy()

df_taxa_nc["sample_id"] = sample_ids

df_taxa_nc = df_taxa_nc.loc[mask_controls].copy()

In [67]:
df_taxa_nc.shape

(33, 182)

In [68]:
df_taxa_nc = pd.merge(df_taxa_nc, df_metadata_file[["RDV", "sample_id"]], on="sample_id", how="inner")

In [69]:
ancom_results = ancom(df_taxa_nc.drop(columns=["RDV", "sample_id"]) + 1, df_taxa_nc["RDV"])[0]

In [70]:
ancom_results["Reject null hypothesis"].sum()

0

In [119]:
df_taxa_clr

#OTU ID,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola_vulgatus,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_fragilis,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli_D,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_flexneri,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_bouchesdurhonensis,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides_distasonis_A,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_uniformis,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_faecis,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_thetaiotaomicron,...,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Porphyromonadaceae;g__Porphyromonas;s__Porphyromonas_uenonis_A,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella_sp001553265,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella_oris,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Lawsonibacter;s__Lawsonibacter_sp900066825,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Faecalimonas;s__Faecalimonas_sp900556835,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Hungatella_A;s__Hungatella_A_sp003478355,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Lawsonibacter;s__Lawsonibacter_sp000177015,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Paraprevotella;s__Paraprevotella_sp003477995,k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides_oleiciplenus,k__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-81;s__CAG-81_sp900066785
pos.dna.02.S35.001.fa,12.564805,12.596202,10.432703,10.38242,10.717464,-0.529343,-0.683494,5.178716,1.714401,2.002083,...,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253,-2.475253
CAPMA.83.S12.001.fa,1.502473,0.1089,8.026002,6.006419,4.688253,-3.136293,0.814951,1.198817,3.197282,2.198838,...,3.13894,4.841704,3.443421,-2.730828,-2.124692,-0.929852,-4.522588,-2.443146,-3.67529,-3.82944
FOGI.63.S13.001.fa,3.594296,-0.61124,0.322862,-0.505445,-2.583584,-0.913521,-0.274768,2.71489,0.887994,2.404863,...,2.407628,1.353132,-0.121505,-2.117494,-3.004797,-1.820056,-3.537014,-2.626143,-1.696893,-2.913825
COCL.11.S7.001.fa,1.536088,1.074962,4.6631,3.188711,1.774582,-2.081677,-2.133863,1.997295,2.537797,4.726931,...,2.444156,4.645267,1.707124,-2.901118,-3.214776,-2.330573,-4.213305,-3.594266,-0.489334,-3.85663
WACH.38.S1.001.fa,4.240597,-0.421227,2.999156,1.341303,0.023436,-1.574201,-1.208627,2.68513,0.050759,1.814724,...,-2.117196,3.141126,1.670049,-2.016654,-1.179972,-1.957565,-4.815676,-1.953475,-2.093441,-1.995148
PRPI.42.S31.001.fa,3.169797,-1.43864,7.172793,4.708191,3.824696,-0.532085,0.522833,2.376027,1.803052,1.17211,...,2.369289,-5.137255,-6.075525,0.473331,-1.156355,-0.917108,-1.554945,-1.227844,-0.051808,-1.882509
RODE.80.S8.001.fa,5.918568,-1.343871,4.211689,4.242758,4.215763,-0.235021,1.030012,2.403992,1.124451,2.924393,...,3.704593,1.527456,-1.924759,-0.191832,-1.928109,-1.326253,-1.318927,-1.467165,-2.030491,-1.091414
pos.dna.01.S17.001.fa,10.758325,10.466699,8.427199,8.411471,8.758813,-1.616304,-0.923157,3.4861,2.323658,1.673341,...,-0.923157,-1.010168,-1.798626,-0.667223,-0.635475,-1.010168,-2.309451,-2.714916,-2.1553,-1.703315
MEEN.93.S11.001.fa,4.110142,6.091029,4.004449,3.807656,2.422215,-0.461099,0.197382,3.913378,1.41049,4.790671,...,-2.942135,-2.301677,-2.368428,0.770305,1.697052,0.457184,-0.315932,-3.596317,-1.389233,-1.218305
DETH.41.S27.001.fa,3.183976,-3.761992,-0.928224,-2.9201,-3.462587,0.307518,-3.401551,1.633435,0.197562,-0.076657,...,1.070765,0.845063,0.288315,1.436088,-1.135138,-0.484491,-1.364955,-0.362683,-2.735072,0.163711


In [None]:
df_taxa_clr.index.name = "sample_id"

df_metadata_file.loc[df_metadata_file["sample_id"] == "MEIR_28", "sample_id"] = "MERI_28"

df_merged = pd.merge(df_taxa_clr, df_metadata_file, on="sample_id", how="left")

In [None]:
# MERI 28 is misspelled
set(sample_ids).difference(set(df_metadata_file["sample_id"]))

In [None]:
[col for col in df_merged.columns if "RDV" in col]

In [None]:
# hospitalization_day
# outcome severe
# CONC_N1

# RDV

In [None]:
# Veillonella

In [80]:
df_merged["RDV"]

NameError: name 'df_merged' is not defined

In [74]:
for col in genera:
    sns.boxplot(x="RDV", y=col, data=df_merged)
    plt.show()

In [75]:
species = "Enterococcus_faecalis"

[col for col in df_taxa_nc.columns if species in col]

[]