In [1]:
!qiime --version

q2cli version 2025.4.0
Run `qiime info` for more version details.


# Metadata

In [2]:
import pandas as pd
import numpy as np

In [43]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
md = pd.read_csv('../../data/14834_metadata.txt', sep='\t')
hmo_stat = pd.read_csv('../../data/metab_HMO_secretor_status.csv', index_col=0)
dws = pd.read_csv('../../data/other_metadata/metab_observational-metadata-with-secretor.csv',
                 index_col=0)

In [6]:
hmo_stat['hmo_id'] = hmo_stat.index.astype(str)
hmo_stat.head()

Unnamed: 0_level_0,hmo_Secretor,hmo_id
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1
23486,0,23486
32136,1,32136
40010,1,40010
41095,1,41095
41865,0,41865


In [7]:
hmo_stat.shape

(130, 2)

In [8]:
dws.shape

(533, 22)

In [9]:
ad_md = pd.merge(hmo_stat, dws[['drink_water_safe']], 
                 how='right', left_index=True, right_index=True)
ad_md.head()

Unnamed: 0_level_0,hmo_Secretor,hmo_id,drink_water_safe
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32208,,,nothing
23544,,,nothing
22843,,,nothing
51798,,,boil
23208,,,nothing


In [10]:
for index, row in md.iterrows():
        if isinstance(row['tube_id'], str) and row['tube_id'].startswith('14834.'):
            md.loc[index, 'tube_id'] = row['tube_id'][6:]

In [11]:
if 'host_age_infant' in md.columns:
    md['host_age_infant'] = md['host_age_infant'].replace('not applicable', -1)

In [12]:
md['sample_name'] = md['qiita_study_id'].astype(str)+"."+md['tube_id'].astype(str)
md.set_index('sample_name', inplace=True)

In [13]:
md.shape

(2124, 66)

In [14]:
md2 = pd.merge(md.reset_index(), ad_md, left_on='sample_barcode', right_on='hmo_id', how='left')
md2.shape

(2124, 70)

In [15]:
md2.set_index('sample_name', inplace=True)
md2.drop(columns=['hmo_id'], inplace=True)

In [16]:
# Create a dictionary to store the determined status for each host_subject_id
status_dict = {}

# Iterate through the DataFrame to determine the correct status for each host_subject_id
for index, row in md2.iterrows():
    subject_id = row['host_subject_id']
    status = row['hmo_Secretor']

    if status in ['0', '1']:
        if subject_id not in status_dict:
            status_dict[subject_id] = status

# Iterate through the DataFrame again to update the status values
for index, row in md2.iterrows():
    subject_id = row['host_subject_id']
    if subject_id in status_dict:
        md2.loc[index, 'hmo_Secretor'] = status_dict[subject_id]

In [17]:
# Create a dictionary to store the determined 'drink_water_safe' for each family_id
status_dict = {}

# Iterate through the DataFrame to determine the correct 'drink_water_safe' for each family_id
for index, row in md2.iterrows():
    family_id = row['family_id']
    status = row['drink_water_safe']

    if pd.notna(status) and status != 'unknown':
        if family_id not in status_dict:
            status_dict[family_id] = status

# Iterate through the DataFrame again to update the 'drink_water_safe' values
for index, row in md2.iterrows():
    family_id = row['family_id']
    if family_id in status_dict:
        md2.loc[index, 'drink_water_safe'] = status_dict[family_id]

In [18]:
# Create a dictionary to store the determined status for each host_subject_id
status_dict = {}

# Iterate through the DataFrame to determine the correct status for each host_subject_id
for index, row in md2.iterrows():
    subject_id = row['family_id']
    status = row['hmo_Secretor']

    if status in ['0', '1']:
        if subject_id not in status_dict:
            status_dict[subject_id] = status

# Iterate through the DataFrame again to update the status values
for index, row in md2.iterrows():
    subject_id = row['family_id']
    if subject_id in status_dict:
        md2.loc[index, 'hmo_Secretor'] = status_dict[subject_id]

In [19]:
md2.hmo_Secretor.value_counts()

hmo_Secretor
1          1204
0           536
unknown       3
Name: count, dtype: int64

In [20]:
def categorize(value):
        if value == 'nothing':
            return 'Untreated'
        else:
            return 'Treated'

In [21]:
md2['drink_water_safe_simplified'] = md2['drink_water_safe'].astype(str).apply(categorize)
md2.head()

Unnamed: 0_level_0,age_enrollment_study,collection_date,collection_timestamp,country,description,dna_extracted,elevation,empo_1,empo_2,empo_3,...,sample_barcode,sample_type,scientific_name,specimen_name,taxon_id,title,tube_id,hmo_Secretor,drink_water_safe,drink_water_safe_simplified
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14834.100328,0,2021,2021,Bangladesh,infant stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,100328,feces,human gut metagenome,infantstool.193days.100328,408170,sepsis observational cohort schedule a,100328,1,nothing,Untreated
14834.100334,0,2021,2021,Bangladesh,infant stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,100334,feces,human gut metagenome,infantstool.180days.100334,408170,sepsis observational cohort schedule a,100334,0,filter,Treated
14834.100707,0,2021,2021,Bangladesh,maternal stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,100707,feces,human gut metagenome,maternalstool.180days.100707,408170,sepsis observational cohort schedule a,100707,1,nothing,Untreated
14834.100787,1,2021,2021,Bangladesh,maternal stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,100787,feces,human gut metagenome,maternalstool.194days.100787,408170,sepsis observational cohort schedule a,100787,1,nothing,Untreated
14834.100797,1,2021,2021,Bangladesh,maternal stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,100797,feces,human gut metagenome,maternalstool.180days.100797,408170,sepsis observational cohort schedule a,100797,1,nothing,Untreated


In [22]:
md2['sample_type_binary'] = md2['sample_type'].apply(lambda x: x if x in ['control blank', 'control positive'] else 'sample')

In [23]:
md2['host_age_infant'] = md2['host_age_infant'].astype(float)

In [24]:
def find_nearest_age(age):
    age_categories = [0, 7, 14, 30, 60, 90, 180]
    nearest_age = min(age_categories, key=lambda x: abs(x - age))
    return nearest_age

In [25]:
md2['host_age_infant_cat'] = md2['host_age_infant'].apply(find_nearest_age).astype(str) # Ensure it is a string

In [26]:
md2.host_age_infant_cat.value_counts()

host_age_infant_cat
0      638
14     342
7      315
60     249
30     233
90     188
180    159
Name: count, dtype: int64

In [27]:
md2['hmo_Secretor_str'] = md2['hmo_Secretor'].map({'0': 'not a secretor', 
                                                    '1': 'secretor', 
                                                    np.nan: 'unknown'})
md2['hmo_Secretor_str'].value_counts()

hmo_Secretor_str
secretor          1204
not a secretor     536
unknown            381
Name: count, dtype: int64

In [28]:
md2['sample_type2'] = md2['host_life_stage'].astype(str)+"_"+md2['sample_type'].astype(str)
md2['sample_type2'].value_counts()

sample_type2
infant_feces                       385
adult_feces                        385
infant_skin of arm                 350
infant_tongue                      350
not applicable_control blank       261
adult_breast milk                  219
not applicable_control positive     88
adult_vaginal mucus                 50
infant_plasma                       34
not applicable_not a specimen        2
Name: count, dtype: int64

In [29]:
md2['delivery_bf-cat'] = md2['mode_delivery'].astype(str)+"_"+md2['fp_long_cat'].astype(str)
md2['delivery_bf-cat'].value_counts()

delivery_bf-cat
Vaginal_nan                      699
C-section_nan                    688
not applicable_not applicable    351
C-section_EBF                    131
Vaginal_EBF                      115
Vaginal_Partial BF                47
C-section_Predominant BF          37
Vaginal_Predominant BF            34
C-section_Partial BF              21
C-section_not applicable           1
Name: count, dtype: int64

In [30]:
ss = {'adult':'Source', 'infant':'Sink', 'not applicable':'not applicable'}
md2['SourceSink'] = md2['host_life_stage'].map(ss)

In [31]:
md2['30d_cat'] = md2['host_age_infant'].apply(lambda x: '30 days of age or less' if x <= 30 else 'older than 30 days')

In [32]:
md2['sample_type']=='feces'

sample_name
14834.100328                         True
14834.100334                         True
14834.100707                         True
14834.100787                         True
14834.100797                         True
                                    ...  
14834.KATHARO.SEP.21.11H            False
14834.donotuse.SEP.15.12E           False
14834.donotuse.SEP.7.8E             False
14834.infantstool.21days.41875       True
14834.maternalstool.21days.41875     True
Name: sample_type, Length: 2124, dtype: bool

In [33]:
family_identifiers = ['10058', '10374', '10571', '10684', '11339', '11494', '12258', '12281']
md2['variability'] = None

# Apply the condition ONLY to rows where 'sample_type' is 'feces'
feces_rows_mask = md2['sample_type']=='feces'

md2.loc[feces_rows_mask, 'variability'] = np.where(
    md2.loc[feces_rows_mask, 'family_id'].isin(family_identifiers),
    'adult more variable than infant',
    'infant more variable than adult'
)

md2.head()

Unnamed: 0_level_0,age_enrollment_study,collection_date,collection_timestamp,country,description,dna_extracted,elevation,empo_1,empo_2,empo_3,...,drink_water_safe,drink_water_safe_simplified,sample_type_binary,host_age_infant_cat,hmo_Secretor_str,sample_type2,delivery_bf-cat,SourceSink,30d_cat,variability
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14834.100328,0,2021,2021,Bangladesh,infant stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,nothing,Untreated,sample,180,secretor,infant_feces,Vaginal_Partial BF,Sink,older than 30 days,infant more variable than adult
14834.100334,0,2021,2021,Bangladesh,infant stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,filter,Treated,sample,180,not a secretor,infant_feces,Vaginal_Partial BF,Sink,older than 30 days,infant more variable than adult
14834.100707,0,2021,2021,Bangladesh,maternal stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,nothing,Untreated,sample,180,secretor,adult_feces,Vaginal_nan,Source,older than 30 days,infant more variable than adult
14834.100787,1,2021,2021,Bangladesh,maternal stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,nothing,Untreated,sample,180,secretor,adult_feces,Vaginal_nan,Source,older than 30 days,infant more variable than adult
14834.100797,1,2021,2021,Bangladesh,maternal stool,True,32,Host-associated,Host-associated (non-saline),Animal (non-saline),...,nothing,Untreated,sample,180,secretor,adult_feces,Vaginal_nan,Source,older than 30 days,infant more variable than adult


In [30]:
md2.to_csv('../data/metadata_combined.txt', sep='\t')

In [34]:
md2.sample_type.value_counts()

sample_type
feces               770
skin of arm         350
tongue              350
control blank       261
breast milk         219
control positive     88
vaginal mucus        50
plasma               34
not a specimen        2
Name: count, dtype: int64

In [35]:
print(md2.host_age_infant.dtype)

float64


#### For SCRuB

In [38]:
smp_prep_other = pd.read_csv('../../data/14834_prep_17069_20240711-165257.txt', sep = '\t', index_col=0)
smp_prep_other.head()

Unnamed: 0_level_0,center_name,center_project_name,experiment_design_description,fraction_non_human,fraction_passing_quality_filter,i5_index_id,i7_index_id,index,index2,instrument_model,...,run_center,run_date,run_prefix,runid,sample_plate,sample_project,sample_well,sequencing_meth,total_biological_reads_r1r2,well_description
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14834.23409,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.031077,iTru5_112_H,iTru7_109_10,TGGCACTA,CCGATGTA,Illumina NovaSeq 6000,...,IGM,2023-02-08,23409_S1055_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21,SickKids_SEPSiS,P23,sequencing by synthesis,9174092.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21...
14834.23425,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.127588,iTru5_108_A,iTru7_102_06,TGTGCGTT,GACGATCT,Illumina NovaSeq 6000,...,IGM,2023-02-08,23425_S967_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21,SickKids_SEPSiS,P1,sequencing by synthesis,6152340.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21...
14834.23946,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.025014,iTru5_108_G,iTru7_108_06,ACCGCATA,GATAGGCT,Illumina NovaSeq 6000,...,IGM,2023-02-08,23946_S1039_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21,SickKids_SEPSiS,P19,sequencing by synthesis,3725584.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21...
14834.24316,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.000585,iTru5_12_E,iTru7_208_10,AACTGGTG,AGAACGAG,Illumina NovaSeq 6000,...,IGM,2023-02-08,24316_S828_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15...,SickKids_SEPSiS,G15,sequencing by synthesis,4693448.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15...
14834.24317,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.71368,iTru5_09_C,iTru7_206_06,AACGACGT,ACAGCAAC,Illumina NovaSeq 6000,...,IGM,2023-02-08,24317_S417_L003,230208_A00953_0688_AHVGW2DSX5,SEPSiS_SickKids_infant_skin_16-19,SickKids_SEPSiS,A9,sequencing by synthesis,3129624.0,SEPSiS_SickKids_infant_skin_16-19.24317.A9


In [39]:
smp_prep_other.sample_plate.unique()

array(['SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15_diluted',
       'SEPSiS_SickKids_infant_skin_16-19',
       'SickKids_SEPSiS_Infant_Oral_14834_P14',
       'SickKids_SEPSiS_Infant_Oral_14834_P11',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15',
       'SickKids_SEPSiS_Infant_Oral_14834_P13',
       'SickKids_SEPSiS_Infant_Oral_14834_P12',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P20',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21_diluted'],
      dtype=object)

In [40]:
plates = {'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21':'P21',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15_diluted':'P15',
       'SEPSiS_SickKids_infant_skin_16-19':'P1619',
       'SickKids_SEPSiS_Infant_Oral_14834_P14':'P14',
       'SickKids_SEPSiS_Infant_Oral_14834_P11':'P11',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15':'P15',
       'SickKids_SEPSiS_Infant_Oral_14834_P13':'P13',
       'SickKids_SEPSiS_Infant_Oral_14834_P12':'P12',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P20':'P20',
       'SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21_diluted':'P21'}
smp_prep_other['plate_short'] = smp_prep_other['sample_plate'].map(plates)
smp_prep_other['plate_well'] = smp_prep_other['plate_short'].astype(str)+smp_prep_other['sample_well'].astype(str)
smp_prep_other.head()

Unnamed: 0_level_0,center_name,center_project_name,experiment_design_description,fraction_non_human,fraction_passing_quality_filter,i5_index_id,i7_index_id,index,index2,instrument_model,...,run_prefix,runid,sample_plate,sample_project,sample_well,sequencing_meth,total_biological_reads_r1r2,well_description,plate_short,plate_well
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14834.23409,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.031077,iTru5_112_H,iTru7_109_10,TGGCACTA,CCGATGTA,Illumina NovaSeq 6000,...,23409_S1055_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21,SickKids_SEPSiS,P23,sequencing by synthesis,9174092.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21...,P21,P21P23
14834.23425,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.127588,iTru5_108_A,iTru7_102_06,TGTGCGTT,GACGATCT,Illumina NovaSeq 6000,...,23425_S967_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21,SickKids_SEPSiS,P1,sequencing by synthesis,6152340.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21...,P21,P21P1
14834.23946,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.025014,iTru5_108_G,iTru7_108_06,ACCGCATA,GATAGGCT,Illumina NovaSeq 6000,...,23946_S1039_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21,SickKids_SEPSiS,P19,sequencing by synthesis,3725584.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P21...,P21,P21P19
14834.24316,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.000585,iTru5_12_E,iTru7_208_10,AACTGGTG,AGAACGAG,Illumina NovaSeq 6000,...,24316_S828_L003,230208_A00953_0688_AHVGW2DSX5,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15...,SickKids_SEPSiS,G15,sequencing by synthesis,4693448.0,SEPSIS_Infant_Skin_Oral_Vaginal_Human_Milk_P15...,P15,P15G15
14834.24317,UCSD,SickKids_SEPSiS,NovaSeq WGS,,0.71368,iTru5_09_C,iTru7_206_06,AACGACGT,ACAGCAAC,Illumina NovaSeq 6000,...,24317_S417_L003,230208_A00953_0688_AHVGW2DSX5,SEPSiS_SickKids_infant_skin_16-19,SickKids_SEPSiS,A9,sequencing by synthesis,3129624.0,SEPSiS_SickKids_infant_skin_16-19.24317.A9,P1619,P1619A9


In [44]:
scrub_meta = pd.DataFrame()
scrub_meta['sampleid'] = smp_prep_other.index
mdw = md2[md2.index.isin(smp_prep_other.index.values)]
scrub_meta['is_control'] = scrub_meta['sampleid'].map(mdw['empo_1']).replace({'Host-associated': 0.0, 'Control': 1.0})
scrub_meta['sample_type'] = scrub_meta['sampleid'].map(mdw['sample_type'])
scrub_meta['well_id'] = scrub_meta['sampleid'].map(smp_prep_other['plate_well'])
scrub_meta['plate'] = scrub_meta['sampleid'].map(smp_prep_other['plate_short'])

scrub_meta.to_csv('../../data/scrub_other_metadata.tsv', sep = '\t', index = False)
scrub_meta.tail()

Unnamed: 0,sampleid,is_control,sample_type,well_id,plate
1050,14834.KATHARO.SEP.21.11D,1.0,control positive,P21H21,P21
1051,14834.KATHARO.SEP.21.11E,1.0,control positive,P21J21,P21
1052,14834.KATHARO.SEP.21.11F,1.0,control positive,P21L21,P21
1053,14834.KATHARO.SEP.21.11G,1.0,control positive,P21N21,P21
1054,14834.KATHARO.SEP.21.11H,1.0,control positive,P21P21,P21


In [46]:
scrub_meta = pd.DataFrame()
scrub_meta['sampleid'] = smp_prep_other.index
mdw = md2[md2.index.isin(smp_prep_other.index.values)]
scrub_meta['is_control'] = scrub_meta['sampleid'].map(mdw['empo_1']).replace({'Host-associated': 0.0, 'Control': 1.0})
scrub_meta['sample_type'] = scrub_meta['sampleid'].map(mdw['sample_type'])
scrub_meta['well_id'] = scrub_meta['sampleid'].map(smp_prep_other['sample_well'])
scrub_meta['plate'] = scrub_meta['sampleid'].map(smp_prep_other['plate_short'])

for x in scrub_meta.plate.unique():
    df = scrub_meta[scrub_meta['plate']==x]
    df.to_csv('../../data/scrub_other_%s_metadata.tsv' % x, sep = '\t', index = False)

In [48]:
smp_prep_fecal = pd.read_csv('../../data/14834_prep_17078_20240716-161431.txt', sep = '\t', index_col=0)
smp_prep_fecal.head()

Unnamed: 0_level_0,center_name,center_project_name,experiment_design_description,fraction_passing_quality_filter,i5_index_id,i7_index_id,index,index2,instrument_model,lane,...,run_center,run_date,run_prefix,runid,sample_plate,sample_project,sample_well,sequencing_meth,total_biological_reads_r1r2,well_description
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14834.100328,UCSD,SEPSiS,whole genome sequencing,0.771062,iTru5_19_D,iTru7_304_09,TCGTGGAT,TAGTGACC,Illumina NovaSeq 6000,2,...,IGM,2023-01-09,100328_S617_L002,230109_A00953_0675_BHNL7YDSX5,SEPSiS Infant Fecal Plate 1-4,SEPSiS,E12,sequencing by synthesis,3003094.0,SEPSiS Infant Fecal Plate 1-4.100328.E12
14834.100334,UCSD,SEPSiS,whole genome sequencing,0.76204,iTru5_14_D,iTru7_304_04,CCTCAGTT,CGAACTGT,Illumina NovaSeq 6000,2,...,IGM,2023-01-09,100334_S612_L002,230109_A00953_0675_BHNL7YDSX5,SEPSiS Infant Fecal Plate 1-4,SEPSiS,K10,sequencing by synthesis,4142150.0,SEPSiS Infant Fecal Plate 1-4.100334.K10
14834.100707,UCSD,SEPSiS,whole genome sequencing,0.658879,iTru5_04_F,iTru7_208_10,AACTGGTG,ACCTGGAA,Illumina NovaSeq 6000,2,...,IGM,2023-01-09,100707_S159_L002,230109_A00953_0675_BHNL7YDSX5,SickKids_SEPSiS_maternal_fecal_P7_diluted,SEPSiS,O15,sequencing by synthesis,3046226.0,SickKids_SEPSiS_maternal_fecal_P7_diluted.1007...
14834.100787,UCSD,SEPSiS,whole genome sequencing,0.748281,iTru5_02_G,iTru7_209_08,GTGTGACA,ACCATCCA,Illumina NovaSeq 6000,2,...,IGM,2023-01-09,100787_S168_L002,230109_A00953_0675_BHNL7YDSX5,SickKids_SEPSiS_maternal_fecal_P7,SEPSiS,C19,sequencing by synthesis,3526030.0,SickKids_SEPSiS_maternal_fecal_P7.100787.C19
14834.100797,UCSD,SEPSiS,whole genome sequencing,0.759062,iTru5_06_C,iTru7_205_12,ATCTGTCC,TGGTAGCT,Illumina NovaSeq 6000,2,...,IGM,2023-01-09,100797_S126_L002,230109_A00953_0675_BHNL7YDSX5,SickKids_SEPSiS_maternal_fecal_P7_diluted,SEPSiS,K7,sequencing by synthesis,3076454.0,SickKids_SEPSiS_maternal_fecal_P7_diluted.1007...


In [49]:
plates = {'SEPSiS Infant Fecal Plate 1-4':'P14' ,
       'SickKids_SEPSiS_maternal_fecal_P7_diluted': 'P7',
       'SickKids_SEPSiS_maternal_fecal_P7':"P7",
       'SickKids_SEPSiS_maternal_fecal_P9':"P9",
       'SickKids_SEPSiS_maternal_fecal_P9_diluted':"P9",
       'SickKids_SEPSiS_infant_maternal_fecal_P5[SEPSiS]':"P5",
       'SickKids_SEPSiS_maternal_fecal_P8':"P8",
       'SickKids_SEPSiS_maternal_fecal_P10':"P10",
       'SickKids_SEPSiS_maternal_fecal_P10_diluted':"P10",
       'SickKids_SEPSiS_maternal_fecal_P8_diluted':"P8",
       'SickKids_SEPSiS_infant_maternal_fecal_P5[SEPSiS]_diluted':"P5"}
smp_prep_fecal['plate_short'] = smp_prep_fecal['sample_plate'].map(plates)
smp_prep_fecal['plate_well'] = smp_prep_fecal['plate_short'].astype(str)+smp_prep_fecal['sample_well'].astype(str)
smp_prep_fecal.head()

Unnamed: 0_level_0,center_name,center_project_name,experiment_design_description,fraction_passing_quality_filter,i5_index_id,i7_index_id,index,index2,instrument_model,lane,...,run_prefix,runid,sample_plate,sample_project,sample_well,sequencing_meth,total_biological_reads_r1r2,well_description,plate_short,plate_well
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14834.100328,UCSD,SEPSiS,whole genome sequencing,0.771062,iTru5_19_D,iTru7_304_09,TCGTGGAT,TAGTGACC,Illumina NovaSeq 6000,2,...,100328_S617_L002,230109_A00953_0675_BHNL7YDSX5,SEPSiS Infant Fecal Plate 1-4,SEPSiS,E12,sequencing by synthesis,3003094.0,SEPSiS Infant Fecal Plate 1-4.100328.E12,P14,P14E12
14834.100334,UCSD,SEPSiS,whole genome sequencing,0.76204,iTru5_14_D,iTru7_304_04,CCTCAGTT,CGAACTGT,Illumina NovaSeq 6000,2,...,100334_S612_L002,230109_A00953_0675_BHNL7YDSX5,SEPSiS Infant Fecal Plate 1-4,SEPSiS,K10,sequencing by synthesis,4142150.0,SEPSiS Infant Fecal Plate 1-4.100334.K10,P14,P14K10
14834.100707,UCSD,SEPSiS,whole genome sequencing,0.658879,iTru5_04_F,iTru7_208_10,AACTGGTG,ACCTGGAA,Illumina NovaSeq 6000,2,...,100707_S159_L002,230109_A00953_0675_BHNL7YDSX5,SickKids_SEPSiS_maternal_fecal_P7_diluted,SEPSiS,O15,sequencing by synthesis,3046226.0,SickKids_SEPSiS_maternal_fecal_P7_diluted.1007...,P7,P7O15
14834.100787,UCSD,SEPSiS,whole genome sequencing,0.748281,iTru5_02_G,iTru7_209_08,GTGTGACA,ACCATCCA,Illumina NovaSeq 6000,2,...,100787_S168_L002,230109_A00953_0675_BHNL7YDSX5,SickKids_SEPSiS_maternal_fecal_P7,SEPSiS,C19,sequencing by synthesis,3526030.0,SickKids_SEPSiS_maternal_fecal_P7.100787.C19,P7,P7C19
14834.100797,UCSD,SEPSiS,whole genome sequencing,0.759062,iTru5_06_C,iTru7_205_12,ATCTGTCC,TGGTAGCT,Illumina NovaSeq 6000,2,...,100797_S126_L002,230109_A00953_0675_BHNL7YDSX5,SickKids_SEPSiS_maternal_fecal_P7_diluted,SEPSiS,K7,sequencing by synthesis,3076454.0,SickKids_SEPSiS_maternal_fecal_P7_diluted.1007...,P7,P7K7


In [51]:
scrub_meta_fecal = pd.DataFrame()
scrub_meta_fecal['sampleid'] = smp_prep_fecal.index
mdx = md2[md2.index.isin(smp_prep_fecal.index.values)]
scrub_meta_fecal['is_control'] = scrub_meta_fecal['sampleid'].map(mdx['empo_1']).replace({'Host-associated': 0.0, 'Control': 1.0})
scrub_meta_fecal['sample_type'] = scrub_meta_fecal['sampleid'].map(mdx['sample_type'])
scrub_meta_fecal['well_id'] = scrub_meta_fecal['sampleid'].map(smp_prep_fecal['plate_well'])

scrub_meta_fecal.to_csv('../../data/scrub_fecal_metadata.tsv', sep = '\t', index = False)
scrub_meta_fecal.tail()

Unnamed: 0,sampleid,is_control,sample_type,well_id
857,14834.BLANK.SEP.9.4.F,1.0,control blank,P9L7
858,14834.BLANK.SEP.9.4.G,1.0,control blank,P9N7
859,14834.BLANK.SEP.9.4.H,1.0,control blank,P9P7
860,14834.infantstool.21days.41875,0.0,feces,P14I17
861,14834.maternalstool.21days.41875,0.0,feces,P10L20


In [52]:
combined_plates = pd.concat([smp_prep_other['plate_short'], smp_prep_fecal['plate_short']])
md2['plate'] = md2.index.map(combined_plates)

In [53]:
md2.plate.unique()

array(['P14', 'P7', 'P9', 'P5', 'P8', nan, 'P21', 'P10', 'P15', 'P1619',
       'P11', 'P13', 'P12', 'P20'], dtype=object)

# Qiime2

In [455]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P21"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P21.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P15"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P15.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P1619"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P1619.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P11"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P11.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P12"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P12.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P13"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P13.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P14"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P14.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where '[plate]="P20"' \
    --o-filtered-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P20.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P21.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P15.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P1619.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P11.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P12.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P13.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P14.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P20.qza[0m
[0m

In [None]:
! qiime SCRuB SCRuB \
    --i-table ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P20.qza \
    --m-metadata-file ../data/scrub_other_P20_metadata.tsv \
    --p-control-idx-column is_control \
    --p-sample-type-column sample_type \
    --p-well-location-column well_id \
    --o-scrubbed ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P20_scrubbed.qza
#ran in scrub environment for all plates

In [None]:
! qiime SCRuB SCRuB \
    --i-table ../data/Microbiome/218212_gg2filt-2024.09_feces_ftable.qza \
    --m-metadata-file ../data/scrub_fecal_metadata.tsv \
    --p-control-idx-column is_control \
    --p-sample-type-column sample_type \
    --p-well-location-column well_id \
    --o-scrubbed ../data/Microbiome/218212_gg2filt-2024.09_feces_ftable_scrubbed.qza
#ran in scrub environment

In [456]:
!qiime feature-table merge \
    --i-tables ../data/Microbiome/218212_gg2filt-2024.09_feces_ftable_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P21_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P20_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P11_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P12_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P13_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P14_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P15_scrubbed.qza \
    ../data/Microbiome/218211_gg2filt-2024.09_other_ftable_P1619_scrubbed.qza \
    --p-overlap-method 'error_on_overlapping_sample' \
    --o-merged-table ../data/Microbiome/combined_feature_table.qza
#already filtered against gg2 2024.09 on Qiita and SCRuBbed for contamination (low biomass samples)

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_feature_table.qza[0m
[0m

In [457]:
!qiime feature-table filter-features \
    --i-table ../data/Microbiome/combined_feature_table.qza \
    --m-metadata-file ../data/Microbiome/sepsis-obs.fecal.wolr2.coverage.tsv \
    --p-where '[percent_covered]>0.25' \
    --o-filtered-table ../data/Microbiome/combined_ftable_cov25.qza
#file from using micov, ran by Lucas Patel

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_cov25.qza[0m
[0m

In [458]:
!qiime feature-table filter-features-conditionally \
    --i-table ../data/Microbiome/combined_ftable_cov25.qza \
    --p-abundance 0.000001 \
    --p-prevalence 0.01 \
    --o-filtered-table ../data/Microbiome/combined_ftable_cov25_prev1.qza
#required to be present in at least 1 % of samples and at least 10/10,000,000 reads (0.000001)

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_cov25_prev1.qza[0m
[0m

In [459]:
!qiime feature-table summarize \
    --i-table ../data/Microbiome/combined_ftable_cov25_prev1.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --o-visualization ../data/Microbiome/combined_ftable_cov25_prev1.qzv

[32mSaved Visualization to: ../data/Microbiome/combined_ftable_cov25_prev1.qzv[0m
[0m

In [461]:
!qiime tools export \
    --input-path ../data/Microbiome/combined_ftable_cov25_prev1.qza \
    --output-path ../data/Microbiome/combined_ftable_cov25_prev1

[32mExported ../data/Microbiome/combined_ftable_cov25_prev1.qza as BIOMV210DirFmt to directory ../data/Microbiome/combined_ftable_cov25_prev1[0m
[0m

In [6]:
!qiime greengenes2 taxonomy-from-table \
    --i-reference-taxonomy ../../greengenes2/2024.09.taxonomy.id.nwk.qza \
    --i-table ../data/Microbiome/combined_ftable_cov25_prev1.qza \
    --o-classification ../data/Microbiome/sepsis_taxonomy.qza

[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/sepsis_taxonomy.qza[0m
[0m

## Alpha Diversity

In [9]:
!qiime feature-table rarefy \
    --i-table ../data/Microbiome/combined_ftable_both_feces.qza \
    --p-sampling-depth 500000 \
    --o-rarefied-table ../data/Microbiome/combined_ftable_both_feces_rar500K.qza

!qiime feature-table rarefy \
    --i-table ../data/Microbiome/combined_ftable_infant_skin.qza \
    --p-sampling-depth 300000 \
    --o-rarefied-table ../data/Microbiome/combined_ftable_infant_skin_rar300K.qza

!qiime feature-table rarefy \
    --i-table ../data/Microbiome/combined_ftable_infant_tongue.qza \
    --p-sampling-depth 20000 \
    --o-rarefied-table ../data/Microbiome/combined_ftable_infant_tongue_rar20K.qza

!qiime feature-table rarefy \
    --i-table ../data/Microbiome/combined_ftable_adult_milk.qza \
    --p-sampling-depth 750 \
    --o-rarefied-table ../data/Microbiome/combined_ftable_adult_milk_rar750.qza

!qiime feature-table rarefy \
    --i-table ../data/Microbiome/combined_ftable_adult_vagina.qza \
    --p-sampling-depth 600 \
    --o-rarefied-table ../data/Microbiome/combined_ftable_adult_vagina_rar600.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_both_feces_rar500K.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant_skin_rar300K.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant_tongue_rar20K.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_adult_milk_rar750.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_adult_vagina_rar600.qza[0m
[0m[?25h

In [10]:
!qiime diversity alpha-phylogenetic \
    --i-table ../data/Microbiome/combined_ftable_both_feces_rar500K.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --p-metric faith_pd \
    --o-alpha-diversity ../data/Microbiome/each_body_site_faith-pd/faith_feces_rar500K.qza

!qiime diversity alpha-phylogenetic \
    --i-table ../data/Microbiome/combined_ftable_infant_skin_rar300K.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --p-metric faith_pd \
    --o-alpha-diversity ../data/Microbiome/each_body_site_faith-pd/faith_infant_skin_rar300K.qza

!qiime diversity alpha-phylogenetic \
    --i-table ../data/Microbiome/combined_ftable_infant_tongue_rar20K.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --p-metric faith_pd \
    --o-alpha-diversity ../data/Microbiome/each_body_site_faith-pd/faith_infant_tongue_rar20K.qza

!qiime diversity alpha-phylogenetic \
    --i-table ../data/Microbiome/combined_ftable_adult_milk_rar750.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --p-metric faith_pd \
    --o-alpha-diversity ../data/Microbiome/each_body_site_faith-pd/faith_adult_milk_rar750.qza

!qiime diversity alpha-phylogenetic \
    --i-table ../data/Microbiome/combined_ftable_adult_vagina_rar600.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --p-metric faith_pd \
    --o-alpha-diversity ../data/Microbiome/each_body_site_faith-pd/faith_adult_vagina_rar600.qza

[32mSaved SampleData[AlphaDiversity] to: ../data/Microbiome/each_body_site_faith-pd/faith_feces_rar500K.qza[0m
[0m[?25h[32mSaved SampleData[AlphaDiversity] to: ../data/Microbiome/each_body_site_faith-pd/faith_infant_skin_rar300K.qza[0m
[0m[?25h[32mSaved SampleData[AlphaDiversity] to: ../data/Microbiome/each_body_site_faith-pd/faith_infant_tongue_rar20K.qza[0m
[0m[?25h[32mSaved SampleData[AlphaDiversity] to: ../data/Microbiome/each_body_site_faith-pd/faith_adult_milk_rar750.qza[0m
[0m[?25h[32mSaved SampleData[AlphaDiversity] to: ../data/Microbiome/each_body_site_faith-pd/faith_adult_vagina_rar600.qza[0m
[0m[?25h

In [11]:
!qiime tools export \
    --input-path ../data/Microbiome/each_body_site_faith-pd/faith_feces_rar500K.qza \
    --output-path ../data/Microbiome/each_body_site_faith-pd/faith_feces_rar500K

!qiime tools export \
    --input-path ../data/Microbiome/each_body_site_faith-pd/faith_infant_skin_rar300K.qza \
    --output-path ../data/Microbiome/each_body_site_faith-pd/faith_infant_skin_rar300K

!qiime tools export \
    --input-path ../data/Microbiome/each_body_site_faith-pd/faith_infant_tongue_rar20K.qza \
    --output-path ../data/Microbiome/each_body_site_faith-pd/faith_infant_tongue_rar20K

!qiime tools export \
    --input-path ../data/Microbiome/each_body_site_faith-pd/faith_adult_milk_rar750.qza \
    --output-path ../data/Microbiome/each_body_site_faith-pd/faith_adult_milk_rar750

!qiime tools export \
    --input-path ../data/Microbiome/each_body_site_faith-pd/faith_adult_vagina_rar600.qza \
    --output-path ../data/Microbiome/each_body_site_faith-pd/faith_adult_vagina_rar600

[32mExported ../data/Microbiome/each_body_site_faith-pd/faith_feces_rar500K.qza as AlphaDiversityDirectoryFormat to directory ../data/Microbiome/each_body_site_faith-pd/faith_feces_rar500K[0m
[0m[?25h[32mExported ../data/Microbiome/each_body_site_faith-pd/faith_infant_skin_rar300K.qza as AlphaDiversityDirectoryFormat to directory ../data/Microbiome/each_body_site_faith-pd/faith_infant_skin_rar300K[0m
[0m[?25h[32mExported ../data/Microbiome/each_body_site_faith-pd/faith_infant_tongue_rar20K.qza as AlphaDiversityDirectoryFormat to directory ../data/Microbiome/each_body_site_faith-pd/faith_infant_tongue_rar20K[0m
[0m[?25h[32mExported ../data/Microbiome/each_body_site_faith-pd/faith_adult_milk_rar750.qza as AlphaDiversityDirectoryFormat to directory ../data/Microbiome/each_body_site_faith-pd/faith_adult_milk_rar750[0m
[0m[?25h[32mExported ../data/Microbiome/each_body_site_faith-pd/faith_adult_vagina_rar600.qza as AlphaDiversityDirectoryFormat to directory ../data/Microbiom

In [1]:
!qiime diversity alpha \
    --i-table ../data/Microbiome/combined_ftable_both_feces_rar500K.qza \
    --p-metric shannon \
    --o-alpha-diversity ../data/Microbiome/each_body_site_faith-pd/shannon_feces_rar500K.qza

[32mSaved SampleData[AlphaDiversity] to: ../data/Microbiome/each_body_site_faith-pd/shannon_feces_rar500K.qza[0m
[0m[?25h

In [2]:
!qiime tools export \
    --input-path ../data/Microbiome/each_body_site_faith-pd/shannon_feces_rar500K.qza \
    --output-path ../data/Microbiome/each_body_site_faith-pd/shannon_feces_rar500K

[32mExported ../data/Microbiome/each_body_site_faith-pd/shannon_feces_rar500K.qza as AlphaDiversityDirectoryFormat to directory ../data/Microbiome/each_body_site_faith-pd/shannon_feces_rar500K[0m
[0m[?25h

# Subsets

In [5]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_noblanks.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[host_life_stage]='infant'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_infant.qza
    
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_noblanks.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[host_life_stage]='adult'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_adult.qza
#samples to keep, adult has single timepoints

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_adult.qza[0m
[0m[?25h

In [6]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_noblanks.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='feces'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_both_feces.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_both_feces.qza[0m
[0m[?25h

In [7]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='feces'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_infant_feces.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='skin of arm'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_infant_skin.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='tongue'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_infant_tongue.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant_feces.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant_skin.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant_tongue.qza[0m
[0m[?25h

In [8]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_adult.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='feces'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_adult_feces.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_adult.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='breast milk'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_adult_milk.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_adult.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='vaginal mucus'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_adult_vagina.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_adult_feces.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_adult_milk.qza[0m
[0m[?25h[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_adult_vagina.qza[0m
[0m[?25h

In [15]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_adult_feces.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/ftable_adult_feces_min10-5.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_adult_feces_min10-5.qza[0m
[0m

In [16]:
!qiime taxa collapse \
    --i-table ../data/Microbiome/ftable_adult_feces_min10-5.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 6 \
    --o-collapsed-table ../data/Microbiome/ftable_adult_feces_min10-5_genus.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_adult_feces_min10-5_genus.qza[0m
[0m

In [23]:
!qiime feature-table summarize \
    --i-table ../data/Microbiome/combined_ftable_adult_vagina.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --o-visualization ../data/Microbiome/combined_ftable_adult_vagina.qzv

[32mSaved Visualization to: ../data/Microbiome/combined_ftable_adult_vagina.qzv[0m
[0m

### Differential Abundance: ANCOM-BC

#### Early

In [479]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant_feces.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[host_age_infant]<30" \
    --o-filtered-table ../data/Microbiome/combined_ftable_infant_feces_early.qza
#keeps samples less than 30 days old

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_infant_feces_early.qza[0m
[0m

In [480]:
!qiime taxa collapse \
    --i-table ../data/Microbiome/combined_ftable_infant_feces_early.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 6 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_feces_early_genus.qza

!qiime taxa collapse \
    --i-table ../data/Microbiome/combined_ftable_infant_feces_early.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 7 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_feces_early_species.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_early_genus.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_early_species.qza[0m
[0m

In [483]:
!qiime composition ancombc \
    --i-table ../data/Microbiome/ftable_infant_feces_early_species.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-formula 'mode_delivery + drink_water_safe_simplified + fp_crosssec_cat + fp_long_cat' \
    --p-reference-levels mode_delivery::'Vaginal' drink_water_safe_simplified::'no water treatment' fp_crosssec_cat::'EBF' fp_long_cat::'EBF' \
    --o-differentials ../data/Microbiome/infant_fecal_early_ancombc_differentials4.qza

[32mSaved FeatureData[DifferentialAbundance] to: ../data/Microbiome/infant_fecal_early_ancombc_differentials4.qza[0m
[0m

In [482]:
!qiime composition da-barplot \
  --i-data ../data/Microbiome/infant_fecal_early_ancombc_differentials3.qza \
  --p-significance-threshold 0.05 \
  --p-effect-size-threshold 2 \
  --p-level-delimiter ';' \
  --o-visualization ../data/Microbiome/infant_fecal_early_ancombc_differentials3.qzv

[32mSaved Visualization to: ../data/Microbiome/infant_fecal_early_ancombc_differentials3.qzv[0m
[0m

#### Early vs Late, Species Level

In [545]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant_feces.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/ftable_infant_feces_min10-5.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant_skin.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/ftable_infant_skin_min10-5.qza

!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant_tongue.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/ftable_infant_tongue_min10-5.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_min10-5.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_skin_min10-5.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_tongue_min10-5.qza[0m
[0m

In [550]:
!qiime taxa collapse \
    --i-table ../data/Microbiome/ftable_infant_feces_min10-5.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 6 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_feces_min10-5_genus.qza

!qiime taxa collapse \
    --i-table ../data/Microbiome/ftable_infant_skin_min10-5.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 6 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_skin_min10-5_genus.qza

!qiime taxa collapse \
    --i-table ../data/Microbiome/ftable_infant_tongue_min10-5.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 6 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_oral_min10-5_genus.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_min10-5_genus.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_skin_min10-5_genus.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_oral_min10-5_genus.qza[0m
[0m

In [None]:
!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ftable_infant_feces_min10-5_genus.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula 'drink_water_safe_simplified' \
    --p-random-effects-formula '1 | host_subject_id' \
    --p-reference-levels drink_water_safe_simplified::'Untreated' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_fecal_genus_ancombc2_differentials_water-binary.qza

!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_fecal_genus_ancombc2_differentials_water-binary.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_fecal_genus_ancombc2_water-binary.qzv

!qiime tools export \
    --input-path ../data/Microbiome/ancombc2/infant_fecal_genus_ancombc2_differentials_water-binary.qza \
    --output-path ../data/Microbiome/ancombc2/infant_fecal_genus_ancombc2_differentials_water-binary

In [None]:
!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ancombc2/ftable_infant_feces_min10-5_species.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula '30d_cat' \
    --p-random-effects-formula '1 | host_subject_id' \
    --p-reference-levels 30d_cat::'30 days of age or less' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_fecal_ancombc2_differentials_early-late.qza
        
!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ancombc2/ftable_infant_skin_min10-5_species.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula '30d_cat' \
    #--p-random-effects-formula '1 | host_subject_id' \
    --p-reference-levels 30d_cat::'30 days of age or less' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_skin_ancombc2_differentials_early-late.qza

!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ancombc2/ftable_infant_oral_min10-5_species.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-random-effects-formula '1 | host_subject_id' \
    --p-fixed-effects-formula '30d_cat' \
    --p-reference-levels 30d_cat::'30 days of age or less' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_oral_ancombc2_differentials_early-late.qza
#host_subject_id as random effect to control for repeated sampling
#ran ancombc2 in qiime2-2025.4

In [None]:
!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_fecal_ancombc2_differentials_early-late.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_fecal_ancombc2_differentials_early-late.qzv

!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_skin_ancombc2_differentials_early-late.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_skin_ancombc2_differentials_early-late.qzv

!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_oral_ancombc_differentials_early-late.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_oral_ancombc2_differentials_early-late.qzv
#ran ancombc2 in qiime2-2025.4

In [None]:
!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ancombc2/ftable_infant_feces_min10-5_genus.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula '30d_cat' \
    --p-reference-levels 30d_cat::'30 days of age or less' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late.qza
        
!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ancombc2/ftable_infant_skin_min10-5_genus.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula '30d_cat' \
    --p-reference-levels 30d_cat::'30 days of age or less' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late.qza

!qiime composition ancombc2 \
    --i-table ../data/Microbiome/ancombc2/ftable_infant_oral_min10-5_genus.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula '30d_cat' \
    --p-reference-levels 30d_cat::'30 days of age or less' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late.qza
#ran ancombc2 in qiime2-2025.4

In [None]:
!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late.qzv

!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late.qzv

!qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late.qza \
  --o-visualization ../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late.qzv
#ran ancombc2 in qiime2-2025.4

In [None]:
!qiime composition tabulate \
    --i-data ../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late.qza \
    --o-visualization ../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late_table.qzv

!qiime composition tabulate \
    --i-data ../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late.qza \
    --o-visualization ../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late_table.qzv

!qiime composition tabulate \
    --i-data ../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late.qza \
    --o-visualization ../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late_table.qzv

In [10]:
import glob
import os
import csv
import json

In [7]:
def convert_jsonl_to_csv(jsonl_filepath, csv_filepath):
    all_fieldnames = set()
    data_to_write = []

    # First pass: Collect all unique fieldnames from the JSONL file
    with open(jsonl_filepath, 'r') as infile:
        for line in infile:
            try:
                data = json.loads(line.strip())
                all_fieldnames.update(data.keys())
                data_to_write.append(data)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON in {jsonl_filepath}: {e}")
                return

    # Second pass: Write the data to CSV using all collected fieldnames
    if all_fieldnames:
        with open(csv_filepath, 'w', newline='') as outfile:
            fieldnames_list = sorted(list(all_fieldnames))  # Ensure consistent column order
            writer = csv.DictWriter(outfile, fieldnames=fieldnames_list, restval='')
            writer.writeheader()
            writer.writerows(data_to_write)
        print(f"Converted '{jsonl_filepath}' to '{csv_filepath}'")
    else:
        print(f"No data found in '{jsonl_filepath}' to convert.")

In [581]:
for x in glob.glob('../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/*.jsonl'):
    base_name_with_ext = os.path.basename(x)
    base_name = base_name_with_ext.split(".")[0]
    output_dir = os.path.dirname(x)
    csv_output_path = os.path.join(output_dir, f"{base_name}.csv")
    convert_jsonl_to_csv(x, csv_output_path)

Converted '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/p.jsonl' to '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/p.csv'
Converted '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/diff.jsonl' to '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/diff.csv'
Converted '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/passed_ss.jsonl' to '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/passed_ss.csv'
Converted '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/q.jsonl' to '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/q.csv'
Converted '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/se.jsonl' to '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/se.csv'
Converted '../data/Microbiome/ancombc2/infant_fecal_ancombc2_genus-diff_early-late/W.jsonl' to '../da

In [582]:
for x in glob.glob('../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/*.jsonl'):
    base_name_with_ext = os.path.basename(x)
    base_name = base_name_with_ext.split(".")[0]
    output_dir = os.path.dirname(x)
    csv_output_path = os.path.join(output_dir, f"{base_name}.csv")
    convert_jsonl_to_csv(x, csv_output_path)

Converted '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/p.jsonl' to '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/p.csv'
Converted '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/diff.jsonl' to '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/diff.csv'
Converted '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/passed_ss.jsonl' to '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/passed_ss.csv'
Converted '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/q.jsonl' to '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/q.csv'
Converted '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/se.jsonl' to '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/se.csv'
Converted '../data/Microbiome/ancombc2/infant_skin_ancombc2_genus-diff_early-late/W.jsonl' to '../data/Microbio

In [583]:
for x in glob.glob('../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/*.jsonl'):
    base_name_with_ext = os.path.basename(x)
    base_name = base_name_with_ext.split(".")[0]
    output_dir = os.path.dirname(x)
    csv_output_path = os.path.join(output_dir, f"{base_name}.csv")
    convert_jsonl_to_csv(x, csv_output_path)

Converted '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/p.jsonl' to '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/p.csv'
Converted '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/diff.jsonl' to '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/diff.csv'
Converted '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/passed_ss.jsonl' to '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/passed_ss.csv'
Converted '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/q.jsonl' to '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/q.csv'
Converted '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/se.jsonl' to '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/se.csv'
Converted '../data/Microbiome/ancombc2/infant_oral_ancombc2_genus-diff_early-late/W.jsonl' to '../data/Microbio

In [4]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant_feces.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[host_age_infant]<30 AND [host_age_infant]>10" \
    --o-filtered-table ../data/Microbiome/ftable_infant_feces_10-30days.qza
#keeps samples 10-30 days old

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_10-30days.qza[0m
[0m

In [5]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/ftable_infant_feces_10-30days.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_10-30days_min10-5.qza[0m
[0m

In [6]:
!qiime taxa collapse \
    --i-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 6 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_genus.qza

!qiime taxa collapse \
    --i-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5.qza \
    --i-taxonomy ../data/Microbiome/sepsis_taxonomy.qza \
    --p-level 7 \
    --o-collapsed-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_species.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_genus.qza[0m
[0m[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_species.qza[0m
[0m

In [None]:
qiime composition ancombc2 \
    --i-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_species.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula 'mode_delivery' \
    #--p-random-effects-formula '1 | host_subject_id' \
    --p-reference-levels mode_delivery::'Vaginal' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_species.qza
        

qiime composition ancombc2 \
    --i-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_genus.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula 'mode_delivery' \
    #--p-random-effects-formula '1 | host_subject_id' \
    --p-reference-levels mode_delivery::'Vaginal' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_genus.qza
#host_subject_id as random effect to control for repeated sampling

In [None]:
qiime composition ancombc2 \
    --i-table ../data/Microbiome/ftable_infant_feces_10-30days_min10-5_genus.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-fixed-effects-formula 'drink_water_safe_simplified' \
    #--p-random-effects-formula '1 | host_subject_id' \
    --p-reference-levels drink_water_safe_simplified::'no water treatment' \
    --o-ancombc2-output ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus.qza

In [None]:
qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_species.qza \
  --o-visualization ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_species.qzv

qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_genus.qza \
  --o-visualization ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_genus.qzv

qiime composition ancombc2-visualizer \
  --i-data ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus.qza \
  --o-visualization ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus.qzv

In [None]:
qiime tools export \
    --input-path ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_species.qza \
    --output-path ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_species

qiime tools export \
    --input-path ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_genus.qza \
    --output-path ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_del-mode_genus

qiime tools export \
    --input-path ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus.qza \
    --output-path ../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus

In [13]:
for x in glob.glob('../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/*.jsonl'):
    base_name_with_ext = os.path.basename(x)
    base_name = base_name_with_ext.split(".")[0]
    output_dir = os.path.dirname(x)
    csv_output_path = os.path.join(output_dir, f"{base_name}.csv")
    convert_jsonl_to_csv(x, csv_output_path)

Converted '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/p.jsonl' to '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/p.csv'
Converted '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/diff.jsonl' to '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/diff.csv'
Converted '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/passed_ss.jsonl' to '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/passed_ss.csv'
Converted '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/q.jsonl' to '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/q.csv'
Converted '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/se.jsonl' to '../data/Microbiome/ancombc2/ifecal_ancombc2_differentials_joint_water_genus/se.csv'
Converted '../data/Microbiome/ancombc2/ifecal_ancombc2_differ

# RPCA

### All Together

In [463]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_noblanks.qza \
    --p-min-frequency 644 \
    --o-filtered-table ../data/Microbiome/lr-metrics/ftable_min644.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/ftable_min644.qza[0m
[0m

In [490]:
!qiime feature-table summarize \
    --i-table ../data/Microbiome/lr-metrics/ftable_min644.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --o-visualization ../data/Microbiome/lr-metrics/ftable_min644.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/ftable_min644.qzv[0m
[0m

In [464]:
!qiime gemelli rpca \
    --i-table ../data/Microbiome/lr-metrics/ftable_min644.qza \
    --o-biplot ../data/Microbiome/lr-metrics/all-rpca-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/all-rpca-distance.qza
#samples less than 644 dropped for comparison to rarefied table

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/all-rpca-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/all-rpca-distance.qza[0m
[0m

In [491]:
!qiime gemelli rpca \
    --i-table ../data/Microbiome/core_diversity_644/rarefied_table.qza \
    --o-biplot ../data/Microbiome/lr-metrics/rpca-rarefied-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/rpca-rarefied-distance.qza
#rarified to 644 like standard alpha/beta - retains most vaginal samples

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/rpca-rarefied-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/rpca-rarefied-distance.qza[0m
[0m

In [492]:
!qiime gemelli qc-rarefy \
    --i-table ../data/Microbiome/lr-metrics/ftable_min644.qza \
    --i-rarefied-distance ../data/Microbiome/lr-metrics/rpca-rarefied-distance.qza \
    --i-unrarefied-distance ../data/Microbiome/lr-metrics/all-rpca-distance.qza \
    --o-visualization ../data/Microbiome/lr-metrics/gemelli-rarefy-qc.qzv
#to rarefy or not to rarefy (preferred)? 
#Answer says they are not significantly different, so use unrarefied

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/gemelli-rarefy-qc.qzv[0m
[0m

In [493]:
!qiime emperor biplot \
    --i-biplot ../data/Microbiome/lr-metrics/all-rpca-ordination.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-number-of-features 8 \
    --o-visualization ../data/Microbiome/lr-metrics/rpca-biplot-all.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/rpca-biplot-all.qzv[0m
[0m

In [494]:
!qiime emperor plot \
    --i-pcoa ../data/Microbiome/lr-metrics/all-rpca-ordination.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-custom-axes 'host_age_infant' \
    --p-ignore-pcoa-features True \
    --o-visualization ../data/Microbiome/lr-metrics/rpca-empplot_age.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/rpca-empplot_age.qzv[0m
[0m

In [495]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/all-rpca-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --m-metadata-column host_body_habitat \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/rpca_body-site_bsig.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/rpca_body-site_bsig.qzv[0m
[0m

In [12]:
!qiime qurro loading-plot \
    --i-ranks ../data/Microbiome/lr-metrics/all-rpca-ordination.qza \
    --i-table ../data/Microbiome/combined_ftable_noblanks.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --o-visualization ../data/Microbiome/lr-metrics/rpca_qurro_plot.qzv
#2023.5-gemelli

[32mSaved Visualization to: ../data/Microbiome/reads/lr-metrics/rpca_qurro_plot.qzv[0m
[0m

# phyloRPCA

### All together

In [465]:
!qiime gemelli phylogenetic-rpca-with-taxonomy \
    --i-table ../data/Microbiome/lr-metrics/ftable_min644.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --m-taxonomy-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-min-feature-count 100 \
    --o-biplot ../data/Microbiome/lr-metrics/phyloRPCA-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/phyloRPCA-distance.qza \
    --o-counts-by-node-tree ../data/Microbiome/lr-metrics/phyloRPCA-tree.qza \
    --o-counts-by-node ../data/Microbiome/lr-metrics/phyloRPCA-table.qza \
    --o-t2t-taxonomy ../data/Microbiome/lr-metrics/phyloRPCA-taxonomy.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/phyloRPCA-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/phyloRPCA-distance.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/Microbiome/lr-metrics/phyloRPCA-tree.qza[0m
[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/phyloRPCA-table.qza[0m
[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/lr-metrics/phyloRPCA-taxonomy.qza[0m
[0m

In [496]:
!qiime empress community-plot\
    --i-tree ../data/Microbiome/lr-metrics/phyloRPCA-tree.qza\
    --i-feature-table ../data/Microbiome/lr-metrics/phyloRPCA-table.qza\
    --i-pcoa ../data/Microbiome/lr-metrics/phyloRPCA-ordination.qza\
    --m-sample-metadata-file ../data/metadata_combined.txt\
    --m-feature-metadata-file ../data/Microbiome/lr-metrics/phyloRPCA-taxonomy.qza\
    --p-filter-missing-features\
    --p-number-of-features 10\
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA-empress.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA-empress.qzv[0m
[0m

In [497]:
!qiime emperor plot \
    --i-pcoa ../data/Microbiome/lr-metrics/phyloRPCA-ordination.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-ignore-pcoa-features True \
    --p-custom-axes  host_age_infant \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA-empplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA-empplot.qzv[0m
[0m

In [698]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --m-metadata-column sample_type2 \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_sampletype2_bsig.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_sampletype2_bsig.qzv[0m
[0m

In [17]:
!qiime qurro loading-plot \
    --i-ranks ../data/Microbiome/lr-metrics/phyloRPCA-ordination.qza \
    --i-table ../data/Microbiome/lr-metrics/phyloRPCA-table.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --m-feature-metadata-file ../data/Microbiome/lr-metrics/phyloRPCA-taxonomy.qza \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA-qurro_plot.qzv

[32mSaved Visualization to: ../data/Microbiome/reads/lr-metrics/phyloRPCA-qurro_plot.qzv[0m
[0m

In [7]:
!qiime diversity adonis \
    --i-distance-matrix ../data/Microbiome/lr-metrics/phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-formula "host_age_infant+mode_delivery+host_subject_id+family_id+host_sex+hmo_Secretor_str+drink_water_safe_simplified+sample_type2" \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA-adonis.qzv
#won't do columns with nans fp_long_bin, im_ever_abxs_adhoc; other issues and removed: host_weight, gestational_age_birth

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA-adonis.qzv[0m
[0m[?25h

### Fecal

In [None]:
###both adult and infant fecal over time

In [547]:
!qiime gemelli phylogenetic-rpca-with-taxonomy \
    --i-table ../data/Microbiome/ftable_infant_feces_min10-5.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --m-taxonomy-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-min-feature-count 1000 \
    --o-biplot ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-distance.qza \
    --o-counts-by-node-tree ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-tree.qza \
    --o-counts-by-node ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-table.qza \
    --o-t2t-taxonomy ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-taxonomy.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-distance.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-tree.qza[0m
[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-table.qza[0m
[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-taxonomy.qza[0m
[0m

In [548]:
!qiime empress community-plot\
    --i-tree ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-tree.qza\
    --i-feature-table ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-table.qza\
    --i-pcoa ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-ordination.qza\
    --m-sample-metadata-file ../data/metadata_combined.txt\
    --m-feature-metadata-file ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-taxonomy.qza\
    --p-filter-missing-features\
    --p-number-of-features 10\
    --o-visualization ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-empress.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-empress.qzv[0m
[0m

In [549]:
!qiime emperor plot \
    --i-pcoa ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-ordination.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-ignore-pcoa-features True \
    --p-custom-axes  host_age_infant \
    --o-visualization ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-empplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-empplot.qzv[0m
[0m

In [684]:
!qiime diversity adonis \
    --i-distance-matrix ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-formula 'host_age_infant+host_age_infant_cat' \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_ifeces_bsig_age-cat.qzv

!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/infant_feces_phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --m-metadata-column 30d_cat \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_ifeces_bsig_30d-cat.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_ifeces_bsig_age-cat.qzv[0m
[0m[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_ifeces_bsig_30d-cat.qzv[0m
[0m

In [695]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_noblanks.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[sample_type]='feces'" \
    --o-filtered-table ../data/Microbiome/combined_ftable_both_feces.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_both_feces.qza[0m
[0m

In [696]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_both_feces.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/combined_ftable_both_feces_min10-5.qza

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/combined_ftable_both_feces_min10-5.qza[0m
[0m

In [697]:
!qiime gemelli phylogenetic-rpca-with-taxonomy \
    --i-table ../data/Microbiome/combined_ftable_both_feces_min10-5.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --m-taxonomy-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-min-feature-count 1000 \
    --o-biplot ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-distance.qza \
    --o-counts-by-node-tree ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-tree.qza \
    --o-counts-by-node ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-table.qza \
    --o-t2t-taxonomy ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-taxonomy.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-distance.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-tree.qza[0m
[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-table.qza[0m
[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/lr-metrics/both_feces_phyloRPCA-taxonomy.qza[0m
[0m

#### Infant Feces Only

In [10]:
!qiime gemelli phylogenetic-rpca-with-taxonomy \
    --i-table ../data/Microbiome/ftable_infant_feces_min10-5.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --m-taxonomy-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-min-feature-count 100 \
    --o-biplot ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-distance.qza \
    --o-counts-by-node-tree ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-tree.qza \
    --o-counts-by-node ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-table.qza \
    --o-t2t-taxonomy ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-taxonomy.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-distance.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-tree.qza[0m
[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-table.qza[0m
[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-taxonomy.qza[0m
[0m[?25h

In [5]:
!qiime diversity adonis \
    --i-distance-matrix ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-distance.qza \
    --m-metadata-file ../data/infant_feces_metadata.txt \
    --p-formula "host_age_infant+mode_delivery+host_subject_id+host_sex+hmo_Secretor_str+drink_water_safe_simplified+fp_long_bin+im_ever_abxs_adhoc+gestational_age_birth+AssetIndex2+birthweight+maternal_antibiotics" \
    --o-visualization ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-adonis.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/ifeces_phyloRPCA-adonis.qzv[0m
[0m[?25h

### Skin

In [539]:
!qiime gemelli phylogenetic-rpca-with-taxonomy \
    --i-table ../data/Microbiome/lr-metrics/ftable_infant_skin_min100000.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --m-taxonomy-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-min-feature-count 1000 \
    --o-biplot ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-distance.qza \
    --o-counts-by-node-tree ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-tree.qza \
    --o-counts-by-node ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-table.qza \
    --o-t2t-taxonomy ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-taxonomy.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-distance.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-tree.qza[0m
[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-table.qza[0m
[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-taxonomy.qza[0m
[0m

In [540]:
!qiime empress community-plot\
    --i-tree ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-tree.qza\
    --i-feature-table ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-table.qza\
    --i-pcoa ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-ordination.qza\
    --m-sample-metadata-file ../data/metadata_combined.txt\
    --m-feature-metadata-file ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-taxonomy.qza\
    --p-filter-missing-features\
    --p-number-of-features 10\
    --o-visualization ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-empress.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-empress.qzv[0m
[0m

In [541]:
!qiime emperor plot \
    --i-pcoa ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-ordination.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-ignore-pcoa-features True \
    --p-custom-axes  host_age_infant \
    --o-visualization ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-empplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-empplot.qzv[0m
[0m

In [683]:
!qiime diversity adonis \
    --i-distance-matrix ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-formula 'host_age_infant+host_age_infant_cat' \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_iskin_bsig_age-cat.qzv

!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/infant_skin_phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --m-metadata-column 30d_cat \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_iskin_bsig_30d-cat.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_iskin_bsig_age-cat.qzv[0m
[0m[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_iskin_bsig_30d-cat.qzv[0m
[0m

### Oral

In [542]:
!qiime gemelli phylogenetic-rpca-with-taxonomy \
    --i-table ../data/Microbiome/lr-metrics/ftable_infant_tongue_min100000.qza \
    --i-phylogeny ../../greengenes2/2024.09.phylogeny.id.nwk.qza \
    --m-taxonomy-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-min-feature-count 1000 \
    --o-biplot ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-ordination.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-distance.qza \
    --o-counts-by-node-tree ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-tree.qza \
    --o-counts-by-node ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-table.qza \
    --o-t2t-taxonomy ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-taxonomy.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-ordination.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-distance.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-tree.qza[0m
[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-table.qza[0m
[32mSaved FeatureData[Taxonomy] to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-taxonomy.qza[0m
[0m

In [543]:
!qiime empress community-plot\
    --i-tree ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-tree.qza\
    --i-feature-table ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-table.qza\
    --i-pcoa ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-ordination.qza\
    --m-sample-metadata-file ../data/metadata_combined.txt\
    --m-feature-metadata-file ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-taxonomy.qza\
    --p-filter-missing-features\
    --p-number-of-features 10\
    --o-visualization ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-empress.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-empress.qzv[0m
[0m

In [544]:
!qiime emperor plot \
    --i-pcoa ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-ordination.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-ignore-pcoa-features True \
    --p-custom-axes  host_age_infant \
    --o-visualization ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-empplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-empplot.qzv[0m
[0m

In [682]:
!qiime diversity adonis \
    --i-distance-matrix ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-formula 'host_age_infant+host_age_infant_cat' \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_ioral_bsig_age-cat.qzv

!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/infant_oral_phyloRPCA-distance.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --m-metadata-column 30d_cat \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/phyloRPCA_ioral_bsig_30d-cat.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_ioral_bsig_age-cat.qzv[0m
[0m[32mSaved Visualization to: ../data/Microbiome/lr-metrics/phyloRPCA_ioral_bsig_30d-cat.qzv[0m
[0m

# TEMPTED

### Infants - feces

In [673]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/ftable_infant_feces_min10-5.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[family_id] IN ('10437', '10571', '12281', '11339')" \
    --p-exclude-ids True \
    --o-filtered-table ../data/Microbiome/lr-metrics/ftable_infant_feces_min100000_filt.qza
#100,000 reads minimum per sample
#remove infants with only one timepoint, won't work for tempted

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/ftable_infant_feces_min100000_filt.qza[0m
[0m

In [674]:
!qiime gemelli clr-transformation \
    --i-table ../data/Microbiome/lr-metrics/ftable_infant_feces_min100000_filt.qza \
    --o-clr-table ../data/Microbiome/lr-metrics/tempted/clr_infant_feces_min100000_filt.qza

[32mSaved FeatureTable[Composition] to: ../data/Microbiome/lr-metrics/tempted/clr_infant_feces_min100000_filt.qza[0m
[0m

In [675]:
!qiime gemelli tempted-factorize \
    --i-table ../data/Microbiome/lr-metrics/tempted/clr_infant_feces_min100000_filt.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --p-individual-id-column 'host_subject_id' \
    --p-state-column 'host_age_infant' \
    --o-individual-biplot ../data/Microbiome/lr-metrics/tempted/infant_feces_individual_biplot.qza \
    --o-state-loadings ../data/Microbiome/lr-metrics/tempted/infant_feces_state_loadings.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_feces_distance_matrix.qza \
    --o-svd-center ../data/Microbiome/lr-metrics/tempted/infant_feces_svd_center.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/tempted/infant_feces_individual_biplot.qza[0m
[32mSaved SampleData[SampleTrajectory] to: ../data/Microbiome/lr-metrics/tempted/infant_feces_state_loadings.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/tempted/infant_feces_distance_matrix.qza[0m
[32mSaved SampleData[SampleTrajectory] to: ../data/Microbiome/lr-metrics/tempted/infant_feces_svd_center.qza[0m
[0m

In [505]:
import pandas as pd

# first we import the metdata into pandas
mf = pd.read_csv('../data/metadata_combined.txt', sep='\t',index_col=0)
# next we aggregate by subjects (i.e. 'host_subject_id') 
# and keep the first instance of 'diagnosis_full' by subject.
mf = mf.groupby('host_subject_id').agg({'family_id':'first',
                                        'host_life_stage':'first',
                                        'mode_delivery':'first',
                                        'hmo_Secretor_str':'first',
                                        'drink_water_safe_simplified':'first',
                                       'bf_category':'last'})
# now we save the metadata
mf.index.name = '#SampleID'
mf.index = mf.index.astype(str)
mf.to_csv('../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv', sep='\t')
mf.head(5)

Unnamed: 0_level_0,family_id,host_life_stage,mode_delivery,hmo_Secretor_str,drink_water_safe_simplified,bf_category
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100580,10058,adult,Vaginal,secretor,not filtered,Predominant BF
100581,10058,infant,Vaginal,secretor,not filtered,Predominant BF
100770,10077,adult,C-section,secretor,filtered,EBF
100771,10077,infant,C-section,secretor,filtered,EBF
101060,10106,adult,Vaginal,not a secretor,filtered,EBF


In [506]:
mf.tail(5)

Unnamed: 0_level_0,family_id,host_life_stage,mode_delivery,hmo_Secretor_str,drink_water_safe_simplified,bf_category
#SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
blank.sep.9.4.f,blank.sep.9.4.f,not applicable,not applicable,unknown,not filtered,
blank.sep.9.4.g,blank.sep.9.4.g,not applicable,not applicable,unknown,not filtered,
blank.sep.9.4.h,blank.sep.9.4.h,not applicable,not applicable,unknown,not filtered,
donotuse.SEP.15.12E,not applicable,not applicable,not applicable,unknown,not filtered,
donotuse.SEP.7.8E,not applicable,not applicable,not applicable,unknown,not filtered,


In [676]:
!qiime emperor biplot\
    --i-biplot ../data/Microbiome/lr-metrics/tempted/infant_feces_individual_biplot.qza \
    --m-sample-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-number-of-features 10 \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_feces_subject_biplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_feces_subject_biplot.qzv[0m
[0m

In [None]:
!qiime qurro loading-plot \
    --i-table ../data/Microbiome/ftable_infant_feces_min10-5.qza \
    --i-ranks ../data/Microbiome/lr-metrics/tempted/infant_feces_individual_biplot.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_feces_qurro.qzv

In [677]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_feces_distance_matrix.qza \
    --m-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-metadata-column mode_delivery \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_feces_bsig_dmode.qzv
#not significant

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_feces_bsig_dmode.qzv[0m
[0m

### Infants - skin

In [626]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/combined_ftable_infant_skin.qza \
    --p-min-frequency 100000 \
    --o-filtered-table ../data/Microbiome/lr-metrics/ftable_infant_skin_min100000.qza
#100,000

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/ftable_infant_skin_min100000.qza[0m
[0m

In [627]:
!qiime gemelli clr-transformation \
    --i-table ../data/Microbiome/lr-metrics/ftable_infant_skin_min100000.qza \
    --o-clr-table ../data/Microbiome/lr-metrics/tempted/clr_infant_skin_min100000.qza

[32mSaved FeatureTable[Composition] to: ../data/Microbiome/lr-metrics/tempted/clr_infant_skin_min100000.qza[0m
[0m

In [628]:
!qiime gemelli tempted-factorize \
    --i-table ../data/Microbiome/lr-metrics/tempted/clr_infant_skin_min100000.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --p-individual-id-column 'host_subject_id' \
    --p-state-column 'host_age_infant' \
    --o-individual-biplot ../data/Microbiome/lr-metrics/tempted/infant_skin_individual_biplot.qza \
    --o-state-loadings ../data/Microbiome/lr-metrics/tempted/infant_skin_state_loadings.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_skin_distance_matrix.qza \
    --o-svd-center ../data/Microbiome/lr-metrics/tempted/infant_skin_svd_center.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/tempted/infant_skin_individual_biplot.qza[0m
[32mSaved SampleData[SampleTrajectory] to: ../data/Microbiome/lr-metrics/tempted/infant_skin_state_loadings.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/tempted/infant_skin_distance_matrix.qza[0m
[32mSaved SampleData[SampleTrajectory] to: ../data/Microbiome/lr-metrics/tempted/infant_skin_svd_center.qza[0m
[0m

In [629]:
!qiime emperor biplot\
    --i-biplot ../data/Microbiome/lr-metrics/tempted/infant_skin_individual_biplot.qza \
    --m-sample-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-number-of-features 10 \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_skin_subject_biplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_skin_subject_biplot.qzv[0m
[0m

In [630]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_skin_distance_matrix.qza \
    --m-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-metadata-column drink_water_safe_simplified \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_skin_bsig_water.qzv
#no treatment vs filtered significant

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_skin_bsig_water.qzv[0m
[0m

In [631]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_skin_distance_matrix.qza \
    --m-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-metadata-column mode_delivery \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_skin_bsig_dmode.qzv
#significant

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_skin_bsig_dmode.qzv[0m
[0m

In [49]:
!qiime qurro loading-plot \
    --i-table ../data/Microbiome/lr-metrics/ftable_infant_skin_min100000.qza \
    --i-ranks ../data/Microbiome/lr-metrics/tempted/infant_skin_individual_biplot.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_skin_qurro.qzv

[32mSaved Visualization to: ../data/Microbiome/reads/lr-metrics/tempted/qurro.qzv[0m
[0m

### Infants - tongue

In [666]:
!qiime feature-table filter-samples \
    --i-table ../data/Microbiome/ftable_infant_tongue_min10-5.qza \
    --m-metadata-file ../data/metadata_combined.txt \
    --p-where "[family_id] IN ('10314', '11434', '11199')" \
    --p-exclude-ids True \
    --o-filtered-table ../data/Microbiome/lr-metrics/ftable_infant_tongue_min100000_filt.qza
#100,000 reads minimum per sample
#remove infants with only one timepoint, won't work for tempted

[32mSaved FeatureTable[Frequency] to: ../data/Microbiome/lr-metrics/ftable_infant_tongue_min100000_filt.qza[0m
[0m

In [667]:
!qiime gemelli clr-transformation \
    --i-table ../data/Microbiome/lr-metrics/ftable_infant_tongue_min100000_filt.qza \
    --o-clr-table ../data/Microbiome/lr-metrics/tempted/clr_infant_tongue_min100000_filt.qza

[32mSaved FeatureTable[Composition] to: ../data/Microbiome/lr-metrics/tempted/clr_infant_tongue_min100000_filt.qza[0m
[0m

In [668]:
!qiime gemelli tempted-factorize \
    --i-table ../data/Microbiome/lr-metrics/tempted/clr_infant_tongue_min100000_filt.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --p-individual-id-column 'host_subject_id' \
    --p-state-column 'host_age_infant' \
    --o-individual-biplot ../data/Microbiome/lr-metrics/tempted/infant_tongue_individual_biplot.qza \
    --o-state-loadings ../data/Microbiome/lr-metrics/tempted/infant_tongue_state_loadings.qza \
    --o-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_tongue_distance_matrix.qza \
    --o-svd-center ../data/Microbiome/lr-metrics/tempted/infant_tongue_svd_center.qza

[32mSaved PCoAResults % Properties('biplot') to: ../data/Microbiome/lr-metrics/tempted/infant_tongue_individual_biplot.qza[0m
[32mSaved SampleData[SampleTrajectory] to: ../data/Microbiome/lr-metrics/tempted/infant_tongue_state_loadings.qza[0m
[32mSaved DistanceMatrix to: ../data/Microbiome/lr-metrics/tempted/infant_tongue_distance_matrix.qza[0m
[32mSaved SampleData[SampleTrajectory] to: ../data/Microbiome/lr-metrics/tempted/infant_tongue_svd_center.qza[0m
[0m

In [669]:
!qiime emperor biplot\
    --i-biplot ../data/Microbiome/lr-metrics/tempted/infant_tongue_individual_biplot.qza \
    --m-sample-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --p-number-of-features 10 \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_tongue_subject_biplot.qzv

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_tongue_subject_biplot.qzv[0m
[0m

In [670]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_tongue_distance_matrix.qza \
    --m-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-metadata-column hmo_Secretor_str \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_oral_bsig_m-hmo.qzv
#sig secretor vs not secretor

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_oral_bsig_m-hmo.qzv[0m
[0m

In [671]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_tongue_distance_matrix.qza \
    --m-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-metadata-column mode_delivery \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_oral_bsig_dmode.qzv
#p=0.08, not sig

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_oral_bsig_dmode.qzv[0m
[0m

In [672]:
!qiime diversity beta-group-significance \
    --i-distance-matrix ../data/Microbiome/lr-metrics/tempted/infant_tongue_distance_matrix.qza \
    --m-metadata-file ../data/Microbiome/lr-metrics/tempted/subject-metadata.tsv \
    --m-metadata-column bf_category \
    --p-method permanova \
    --p-pairwise True \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_oral_bsig_bf-cat.qzv
#not significant

[32mSaved Visualization to: ../data/Microbiome/lr-metrics/tempted/infant_oral_bsig_bf-cat.qzv[0m
[0m

In [49]:
!qiime qurro loading-plot \
    --i-table ../data/Microbiome/ftable_infant_tongue_min10-5.qza \
    --i-ranks ../data/Microbiome/lr-metrics/tempted/infant_tongue_individual_biplot.qza \
    --m-sample-metadata-file ../data/metadata_combined.txt \
    --m-feature-metadata-file ../data/Microbiome/sepsis_taxonomy.qza \
    --o-visualization ../data/Microbiome/lr-metrics/tempted/infant_tongue_qurro.qzv

[32mSaved Visualization to: ../data/Microbiome/reads/lr-metrics/tempted/qurro.qzv[0m
[0m