## Calculate MotifTree UniFrac distances (classical workflow)

Download buckettable from GNPS <br>
Replace Job ID below with your GNPS job ID:

In [4]:
!curl -d "" 'https://gnps.ucsd.edu/ProteoSAFe/DownloadResult?task=b76dd5a123e54a7eb42765499f9163a5&view=download_cluster_buckettable' -o GNPS_Buckettable.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  339k    0  339k    0     0   154k      0 --:--:--  0:00:02 --:--:--  154k


In [5]:
!unzip -d GNPS_Buckettable/ GNPS_Buckettable.zip

Archive:  GNPS_Buckettable.zip
  inflating: GNPS_Buckettable/METABOLOMICS-SNETS-V2-b76dd5a1-download_cluster_buckettable-main.tsv  
  inflating: GNPS_Buckettable/params.xml  
  inflating: GNPS_Buckettable/clusterinfo/99f5516ab61046ec8c8a0c8f035a2880.clusterinfo  
  inflating: GNPS_Buckettable/clusterinfosummarygroup_attributes_withIDs_withcomponentID/5207ac30d6054805bf09e9a49538be08.clustersummary  
  inflating: GNPS_Buckettable/networkedges_selfloop/6f89d6e019364eaba19c2f237fc503db..selfloop  
  inflating: GNPS_Buckettable/result_specnets_DB/562ad714cb0c425c8cd7c00ab4472463.tsv  
  inflating: GNPS_Buckettable/groupmapping_converted/38eb2ddbac514d7384f1ca901558bf8a.group  


load libraries

In [1]:
import pandas as pd
import os
import MetaboDistTrees 

In [2]:
motifs = pd.read_csv('../MetaboDistTrees/data/Mass2Motifs_Nodes_Classical.tsv',sep='\t')

In [3]:
motifs.head()

Unnamed: 0,scans,precursormass,parentrt,document,motif,probability,overlap,motif_126,motif_101,motif_146,...,motif_134,motif_163,motif_13,motif_51,motif_168,motif_129,motif_2,motif_194,motif_149,motif_79
0,6,[150.11099],[603.357971],[6],['motif_18'],[0.9999626608739192],[0.32258414714362105],0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,24,[163.09801000000002],[749.914001],[24],['motif_151'],[0.9472968296275981],[0.6157072274669539],0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,25,"[165.11301, 165.11301, 165.11301]","[793.140991, 793.140991, 793.140991]","[25, 25, 25]","['motif_30', 'motif_124', 'motif_151']","[0.6139909448895849, 0.171050486200748, 0.2149...","[0.36333482888887303, 0.36431533940217503, 0.5...",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,28,"[166.08599999999998, 166.08599999999998]","[61.588001, 61.588001]","[28, 28]","['motif_96', 'motif_15']","[0.0658885008503292, 0.934056497137804]","[0.719165167291396, 0.511591825256497]",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,33,"[166.09801000000002, 166.09801000000002, 166.0...","[437.707001, 437.707001, 437.707001]","[33, 33, 33]","['motif_151', 'motif_178', 'motif_124']","[0.273784790619313, 0.6844753921349139, 0.0416...","[0.497580296883846, 0.320036785498011, 0.36296...",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
bt_path = 'GNPS_Buckettable/' + [x for x in os.listdir('GNPS_Buckettable/') if 'METABOLOMICS' in x][0]
bt = pd.read_csv(bt_path, sep = '\t')

In [5]:
bt.head()

Unnamed: 0,#OTU ID,CF231285DRB80135300mzXML,CF010355DRA50135268mzXML,CF318299DRB40135292mzXML
0,4140,0.0,8168.0,0.0
1,1045,136552.0,77664.0,0.0
2,6776,29584.0,29328.0,0.0
3,9580,0.0,27872.0,11804.0
4,5071,13156.0,0.0,7940.0


In [7]:
MetaboDistTrees.get_motiftrees(motifs, bt, method = "ward", metric = "euclidean", outputdir = "MotifTree/")

In [8]:
md = pd.read_csv("../MetaboDistTrees/data/Metadata_DrugMetabolism_Example.txt", sep = "\t")

In [9]:
md.head()

Unnamed: 0,#SampleID,Sample_Group
0,CF231285DRB80135300mzXML,1
1,CF010355DRA50135268mzXML,2
2,CF318299DRB40135292mzXML,3


are there any files in the buckettable which are not included in the metadata file?

In [10]:
set(bt.columns) - set(md['#SampleID'])

{'#OTU ID'}

### Calculate UniFrac distances using Qiime2

make sure to run this part within your qiime2 environment

In [1]:
import qiime2 as q2
import os
path = '/Users/madeleineernst/anaconda3/envs/qiime2-2018.11/bin/' # define path to qiime2 conda environment
os.environ['PATH'] += ':'+path

In [2]:
! biom convert \
    -i MotifTree/Buckettable_Motifs.tsv \
    -o MotifTree/Buckettable_Motifs.biom \
    --table-type="OTU table" --to-hdf5

In [3]:
! qiime tools import --type 'FeatureTable[Frequency]' \
    --input-path MotifTree/Buckettable_Motifs.biom \
    --output-path MotifTree/Buckettable_Motifs.qza

[32mImported MotifTree/Buckettable_Motifs.biom as BIOMV210DirFmt to MotifTree/Buckettable_Motifs.qza[0m


In [4]:
! qiime tools import --type 'Phylogeny[Rooted]' \
    --input-path MotifTree/Tree_Motifs.txt \
    --output-path MotifTree/Tree_Motifs.qza

[32mImported MotifTree/Tree_Motifs.txt as NewickDirectoryFormat to MotifTree/Tree_Motifs.qza[0m


#### weighted UniFrac

In [5]:
! qiime diversity beta-phylogenetic \
    --i-table MotifTree/Buckettable_Motifs.qza \
    --i-phylogeny MotifTree/Tree_Motifs.qza \
    --p-metric weighted_unifrac \
    --o-distance-matrix MotifTree/weighted_unifrac_distance_matrix_Motifs.qza

[32mSaved DistanceMatrix % Properties(['phylogenetic']) to: MotifTree/weighted_unifrac_distance_matrix_Motifs.qza[0m


In [6]:
! qiime diversity pcoa \
    --i-distance-matrix MotifTree/weighted_unifrac_distance_matrix_Motifs.qza \
    --o-pcoa MotifTree/weighted_unifrac_distance_matrix_Motifs_PCoA.qza

[32mSaved PCoAResults to: MotifTree/weighted_unifrac_distance_matrix_Motifs_PCoA.qza[0m


In [7]:
! qiime emperor plot \
    --i-pcoa MotifTree/weighted_unifrac_distance_matrix_Motifs_PCoA.qza \
    --m-metadata-file ../MetaboDistTrees/data/Metadata_DrugMetabolism_Example.txt \
    --o-visualization MotifTree/wMotifTreeUniFrac.qzv

[32mSaved Visualization to: MotifTree/wMotifTreeUniFrac.qzv[0m


In [8]:
q2.Visualization.load('MotifTree/wMotifTreeUniFrac.qzv')

#### unweighted UniFrac

In [9]:
! qiime diversity beta-phylogenetic \
    --i-table MotifTree/Buckettable_Motifs.qza \
    --i-phylogeny MotifTree/Tree_Motifs.qza \
    --p-metric unweighted_unifrac \
    --o-distance-matrix MotifTree/unweighted_unifrac_distance_matrix_Motifs.qza

[32mSaved DistanceMatrix % Properties(['phylogenetic']) to: MotifTree/unweighted_unifrac_distance_matrix_Motifs.qza[0m


In [10]:
! qiime diversity pcoa \
    --i-distance-matrix MotifTree/unweighted_unifrac_distance_matrix_Motifs.qza \
    --o-pcoa MotifTree/unweighted_unifrac_distance_matrix_Motifs_PCoA.qza

[32mSaved PCoAResults to: MotifTree/unweighted_unifrac_distance_matrix_Motifs_PCoA.qza[0m


In [11]:
! qiime emperor plot \
    --i-pcoa MotifTree/unweighted_unifrac_distance_matrix_Motifs_PCoA.qza \
    --m-metadata-file ../MetaboDistTrees/data/Metadata_DrugMetabolism_Example.txt \
    --o-visualization MotifTree/uwMotifTreeUniFrac.qzv

[32mSaved Visualization to: MotifTree/uwMotifTreeUniFrac.qzv[0m


In [12]:
q2.Visualization.load('MotifTree/uwMotifTreeUniFrac.qzv')