In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from qiime2 import Artifact, Visualization

from skbio.stats.distance import mantel, permanova, DistanceMatrix
from scipy.spatial.distance import pdist, cdist

# Setup

In [4]:
wuni = Artifact.load("core-diversity-results/weighted_unifrac_distance_matrix.qza").view(DistanceMatrix)
uuni = Artifact.load("core-diversity-results/unweighted_unifrac_distance_matrix.qza").view(DistanceMatrix)
bray = Artifact.load("core-diversity-results/bray_curtis_distance_matrix.qza").view(DistanceMatrix)

In [7]:
metadata = pd.read_csv("data/metadata.tsv", sep="\t", index_col="Sampleid")

In [17]:
metadata.columns

Index(['Subjectcode', 'Timepoint', 'Sex', 'Length', 'Birth Weight',
       'Gestational Age', 'Infections', 'Antibiotic use', 'Mothers Age',
       'Mothers Height', 'Mothers Weight', 'Education', 'Ethnicity Race',
       'Zip code', 'Annual Household Income', 'Smoke',
       'Smoke during pregnancy', 'Smoke after delivery', 'Alcohol',
       'Drinks before pregnancy per week', 'Current H. pylori infection',
       'Pregnancy complications', 'Probiotics during pregnancy',
       'Post-natal probiotics', 'Post-natal multivitamins',
       'Antibiotic use during pregnancy', 'Antibiotic use which trimester',
       'Antibiotic use Post-natal', 'Other post-natal medications',
       'High cholesterol', 'EBM-FF-PBM', 'Maternal relevant medical hx',
       'Current feeding method', 'Solid foods given', 'Baby Antibiotic use',
       'Mother antibiotic use'],
      dtype='object')

# Categorical variables

In [13]:
! qiime diversity beta-group-significance \
--i-distance-matrix core-diversity-results/weighted_unifrac_distance_matrix.qza \
--m-metadata-file data/metadata.tsv \
--m-metadata-column Subjectcode \
--o-visualization core-diversity-results/PERMANOVA-subject.qzv

[32mSaved Visualization to: core-diversity-results/PERMANOVA-subject.qzv[0m


In [14]:
! qiime diversity beta-group-significance \
--i-distance-matrix core-diversity-results/unweighted_unifrac_distance_matrix.qza \
--m-metadata-file data/metadata.tsv \
--m-metadata-column Subjectcode \
--o-visualization core-diversity-results/PERMANOVA-subject-uuni.qzv

[32mSaved Visualization to: core-diversity-results/PERMANOVA-subject-uuni.qzv[0m


In [15]:
! qiime diversity beta-group-significance \
--i-distance-matrix core-diversity-results/bray_curtis_distance_matrix.qza \
--m-metadata-file data/metadata.tsv \
--m-metadata-column Subjectcode \
--o-visualization core-diversity-results/PERMANOVA-subject-bray.qzv

[32mSaved Visualization to: core-diversity-results/PERMANOVA-subject-bray.qzv[0m


### The variation between individuals is significantly larger than the variation within individuals

#### Weighted UniFrac
- pseudo-F = 2.834
- p-value = 0.001

#### Unweighted UniFrac
- test statistic = 1.59645
- p-value = 0.008

#### Bray-Curtis
- test statistic = 2.9183
- p-value = 0.001

In [None]:
t0 = metadata[metadata["Timepoint"]==0]
t1 = metadata[metadata["Timepoint"]==1]
t2 = metadata[metadata["Timepoint"]==2]



In [None]:
Smoke during pregnancy