# Streamlined Extraction of Nucleic Acids and Metabolites from Low- and High-Biomass Samples Using Isopropanol and Matrix Tubes

## LC-MS/MS Mantel tests

## Set up notebook environment
### Note: This notebook should be run in an environment with QIIME2 and the package gemelli installed. A folder called 'assets' is also needed, which should contain the assets provided in the same repo as this code.


In [20]:
import os
import biom
import warnings
import pickle
import numpy as np
import pandas as pd
import qiime2 as q2
from biom import Table
from skbio import OrdinationResults
from skbio.stats import subsample_counts
from skbio.stats.distance import permanova, anosim, mantel
from skbio.stats.distance import DistanceMatrix
from qiime2.plugins.gemelli.actions import rpca
from qiime2.plugins.gemelli.actions import phylogenetic_rpca_with_taxonomy
from qiime2.plugins.feature_table.actions import rarefy
from qiime2.plugins.diversity.actions import beta_group_significance
from qiime2.plugins.emperor.actions import biplot, plot
from qiime2.plugins.diversity.actions import (beta,
                                              beta_phylogenetic,
                                              pcoa)
from qiime2.plugins import demux, deblur, quality_filter, \
                           metadata, feature_table, alignment, \
                           phylogeny, diversity, emperor, feature_classifier, \
                           taxa, composition

from assets.step_wise_anova import run_stepwise_anova
from qiime2.plugins.fragment_insertion.actions import filter_features
warnings.filterwarnings("ignore", category=DeprecationWarning)

# helper functions
from assets.util_updated_again import (mantel_matched, simulate_depth,
                        all_dists_metab, nested_permanova)

# plotting
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

plt.style.use('ggplot')
%matplotlib inline


# Subset files and make paired files

In [5]:
# Read in sample metadata
md = pd.read_csv('/matrix/metadata_samples/metadata_samples_qiita_20250205.txt',
                sep = '\t')


In [7]:
# Subset metadata files to make files for each extraction protocol and storage solution
md_etoh = md[md['storage_solution'] == 'etoh']
md_isop = md[md['storage_solution'] == 'isopropanol']


In [8]:
# Merge kit-specific files to make paired files for comparison
md_compare_storage = pd.concat([md_etoh, md_isop])


In [10]:
# Export paired files
md_compare_storage.to_csv('/matrix/data/16S/00_mantel/metadata_samples_mantel_storage.txt',
                           sep = '\t',
                           index = False)


# Mantel tests (storage_solution) Ethanol vs. Isopropanol

In [None]:
# Import data
md_compare_storage = q2.Metadata.load('/matrix/data/16S/00_mantel/metadata_samples_mantel_storage.txt')

table_metab_hbm = q2.Artifact.load('/matrix/data/lcms/matrix_lcms_hbm_biom_qiita_ids_noSingletons.qza')
table_metab_lbm = q2.Artifact.load('/matrix/data/lcms/matrix_lcms_lbm_biom_qiita_ids_noSingletons.qza')


In [None]:
# LBM
## Filter table
table_metab_lbm_biom = table_metab_lbm.view(Table)
md_compare_storage_lbm = md_compare_storage.to_dataframe()
shared_ = list(set(table_metab_lbm_biom.ids()) & set(md_compare_storage_lbm.index))
md_compare_storage_lbm = md_compare_storage_lbm.reindex(shared_)
table_metab_lbm_biom_storage = table_metab_lbm_biom.filter(shared_)
keep_ = table_metab_lbm_biom_storage.ids('observation')[table_metab_lbm_biom_storage.sum('observation') > 0]
table_metab_lbm_biom_storage.filter(keep_, axis='observation')

## Import filtered table and re-indexed metadata file
table_metab_lbm_storage = q2.Artifact.import_data('FeatureTable[Frequency]', table_metab_lbm_biom_storage)
md_compare_storage_lbm_q2 = q2.Metadata(md_compare_storage_lbm)

## Generate distance matrices using 'all_dists' utils
dists_res_metab_lbm = all_dists_metab(table_metab_lbm_storage)

## Make a unique ID
md_lbm_storage_dist_q2 = md_compare_storage_lbm_q2.to_dataframe().copy()
#md_lbm_storage_dist_q2['sample_name_mantel_protocol'] = ['.'.join(rn_.split('.')[:-2])
#                               for rn_ in md_lbm_storage_dist_q2.index]
grouping = 'storage_solution'
ids = 'sample_name_mantel_solution'

## Run Mantel test for each distance matrix
mantel_res_metab_lbm = {}
for metric_, dist_mantel in dists_res_metab_lbm.items():
    # subset mf for dist (rare)
    dist_mantel = dist_mantel.distance_matrix.view(DistanceMatrix)
    md_lbm_storage_dist_q2_sub = md_lbm_storage_dist_q2.reindex(dist_mantel.ids)
    # corr, p, n
    mantel_res_metab_lbm[metric_] = mantel_matched(dist_mantel,
                                         md_lbm_storage_dist_q2_sub,
                                         grouping,
                                         ids)

## Compile 
mantel_res_metab_lbm = pd.DataFrame(mantel_res_metab_lbm,
                          ['corr', 'p', 'n'])
mantel_res_metab_lbm.to_csv('/matrix/results/mantel/mantel_metab_lbm_storage.txt', sep='\t')
mantel_res_metab_lbm


In [None]:
# HBM
## Filter table
table_metab_hbm_biom = table_metab_hbm.view(Table)
md_compare_storage_hbm = md_compare_storage.to_dataframe()
shared_ = list(set(table_metab_hbm_biom.ids()) & set(md_compare_storage_hbm.index))
md_compare_storage_hbm = md_compare_storage_hbm.reindex(shared_)
table_metab_hbm_biom_storage = table_metab_hbm_biom.filter(shared_)
keep_ = table_metab_hbm_biom_storage.ids('observation')[table_metab_hbm_biom_storage.sum('observation') > 0]
table_metab_hbm_biom_storage.filter(keep_, axis='observation')

## Import filtered table and re-indexed metadata file
table_metab_hbm_storage = q2.Artifact.import_data('FeatureTable[Frequency]', table_metab_hbm_biom_storage)
md_compare_storage_hbm_q2 = q2.Metadata(md_compare_storage_hbm)

## Generate distance matrices using 'all_dists' utils
dists_res_metab_hbm = all_dists_metab(table_metab_hbm_storage)

## Make a unique ID
md_hbm_storage_dist_q2 = md_compare_storage_hbm_q2.to_dataframe().copy()
#md_hbm_storage_dist_q2['sample_name_mantel_protocol'] = ['.'.join(rn_.split('.')[:-2])
#                               for rn_ in md_hbm_storage_dist_q2.index]
grouping = 'storage_solution'
ids = 'sample_name_mantel_solution'

## Run Mantel test for each distance matrix
mantel_res_metab_hbm = {}
for metric_, dist_mantel in dists_res_metab_hbm.items():
    # subset mf for dist (rare)
    dist_mantel = dist_mantel.distance_matrix.view(DistanceMatrix)
    md_hbm_storage_dist_q2_sub = md_hbm_storage_dist_q2.reindex(dist_mantel.ids)
    # corr, p, n
    mantel_res_metab_hbm[metric_] = mantel_matched(dist_mantel,
                                         md_hbm_storage_dist_q2_sub,
                                         grouping,
                                         ids)

## Compile 
mantel_res_metab_hbm = pd.DataFrame(mantel_res_metab_hbm,
                          ['corr', 'p', 'n'])
mantel_res_metab_hbm.to_csv('/matrix/results/mantel/mantel_metab_hbm_storage.txt', sep='\t')
mantel_res_metab_hbm
