## Calculate ClassyTree UniFrac distances (classical workflow)

Download buckettable from GNPS <br>
Replace Job ID below with your GNPS job ID:

In [1]:
!curl -d "" 'https://gnps.ucsd.edu/ProteoSAFe/DownloadResult?task=b76dd5a123e54a7eb42765499f9163a5&view=download_cluster_buckettable' -o GNPS_Buckettable.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  339k    0  339k    0     0  37656      0 --:--:--  0:00:02 --:--:-- 37642     0   114k      0 --:--:--  0:00:02 --:--:--  114k


In [2]:
!unzip -d GNPS_Buckettable/ GNPS_Buckettable.zip

Archive:  GNPS_Buckettable.zip
  inflating: GNPS_Buckettable/METABOLOMICS-SNETS-V2-b76dd5a1-download_cluster_buckettable-main.tsv  
  inflating: GNPS_Buckettable/params.xml  
  inflating: GNPS_Buckettable/clusterinfo/99f5516ab61046ec8c8a0c8f035a2880.clusterinfo  
  inflating: GNPS_Buckettable/clusterinfosummarygroup_attributes_withIDs_withcomponentID/5207ac30d6054805bf09e9a49538be08.clustersummary  
  inflating: GNPS_Buckettable/networkedges_selfloop/6f89d6e019364eaba19c2f237fc503db..selfloop  
  inflating: GNPS_Buckettable/result_specnets_DB/562ad714cb0c425c8cd7c00ab4472463.tsv  
  inflating: GNPS_Buckettable/groupmapping_converted/38eb2ddbac514d7384f1ca901558bf8a.group  


load libraries

In [3]:
import pandas as pd
import os
import MetaboDistTrees 

In [4]:
cf = pd.read_csv("../MetaboDistTrees/data/ClassyFireResults_Network_Classical.txt", sep = '\t')

In [5]:
set(cf.CF_kingdom)

{'Organic compounds', 'no matches'}

In [6]:
cf.head()

Unnamed: 0,cluster index,CF_componentindex,CF_NrNodes,CF_kingdom,CF_kingdom_score,CF_superclass,CF_superclass_score,CF_class,CF_class_score,CF_subclass,CF_subclass_score,CF_Dparent,CF_Dparent_score,CF_MFramework,CF_MFramework_score
0,1,S1,1,no matches,,no matches,,no matches,,no matches,,no matches,,no matches,
1,6,S2,1,no matches,,no matches,,no matches,,no matches,,no matches,,no matches,
2,19,S3,1,no matches,,no matches,,no matches,,no matches,,no matches,,no matches,
3,22,S4,1,no matches,,no matches,,no matches,,no matches,,no matches,,no matches,
4,24,S5,1,no matches,,no matches,,no matches,,no matches,,no matches,,no matches,


In [7]:
lev = ['CF_class','CF_subclass', 'CF_Dparent','cluster.index']

In [8]:
bt_path = 'GNPS_Buckettable/' + [x for x in os.listdir('GNPS_Buckettable/') if 'METABOLOMICS' in x][0]
bt = pd.read_csv(bt_path, sep = '\t')

In [9]:
bt.head()

Unnamed: 0,#OTU ID,CF231285DRB80135300mzXML,CF010355DRA50135268mzXML,CF318299DRB40135292mzXML
0,4140,0.0,8168.0,0.0
1,1045,136552.0,77664.0,0.0
2,6776,29584.0,29328.0,0.0
3,9580,0.0,27872.0,11804.0
4,5071,13156.0,0.0,7940.0


In [10]:
MetaboDistTrees.get_classytrees(cf,bt,lev,'ClassyTree/')

In [13]:
md = pd.read_csv("../MetaboDistTrees/data/Metadata_DrugMetabolism_Example.txt", sep = "\t")

In [14]:
md.head()

Unnamed: 0,#SampleID,Sample_Group
0,CF231285DRB80135300mzXML,1
1,CF010355DRA50135268mzXML,2
2,CF318299DRB40135292mzXML,3


In [15]:
set(bt.columns) - set(md['#SampleID'])

{'#OTU ID'}

### Calculate UniFrac distances using Qiime2

make sure to run this part within your qiime2 environment

In [1]:
import qiime2 as q2
import os
path = '/Users/madeleineernst/anaconda3/envs/qiime2-2018.11/bin/' # define path to qiime2 conda environment
os.environ['PATH'] += ':'+path

In [2]:
! biom convert \
    -i ClassyTree/Buckettable_ChemicalClasses.tsv \
    -o ClassyTree/Buckettable_ChemicalClasses.biom \
    --table-type="OTU table" --to-hdf5

In [3]:
! qiime tools import --type 'FeatureTable[Frequency]' \
    --input-path ClassyTree/Buckettable_ChemicalClasses.biom \
    --output-path ClassyTree/Buckettable_ChemicalClasses.qza

[32mImported ClassyTree/Buckettable_ChemicalClasses.biom as BIOMV210DirFmt to ClassyTree/Buckettable_ChemicalClasses.qza[0m


In [4]:
! qiime tools import --type 'Phylogeny[Rooted]' \
    --input-path ClassyTree/NewickTree_cluster.index.txt \
    --output-path ClassyTree/NewickTree_ChemicalClasses.qza

[32mImported ClassyTree/NewickTree_cluster.index.txt as NewickDirectoryFormat to ClassyTree/NewickTree_ChemicalClasses.qza[0m


#### weighted UniFrac

In [5]:
! qiime diversity beta-phylogenetic \
    --i-table ClassyTree/Buckettable_ChemicalClasses.qza \
    --i-phylogeny ClassyTree/NewickTree_ChemicalClasses.qza \
    --p-metric weighted_unifrac \
    --o-distance-matrix ClassyTree/weighted_unifrac_distance_matrix_ChemicalClasses.qza

[32mSaved DistanceMatrix % Properties(['phylogenetic']) to: ClassyTree/weighted_unifrac_distance_matrix_ChemicalClasses.qza[0m


In [6]:
! qiime diversity pcoa \
    --i-distance-matrix ClassyTree/weighted_unifrac_distance_matrix_ChemicalClasses.qza \
    --o-pcoa ClassyTree/weighted_unifrac_distance_matrix_ChemicalClasses_PCoA.qza

[32mSaved PCoAResults to: ClassyTree/weighted_unifrac_distance_matrix_ChemicalClasses_PCoA.qza[0m


In [8]:
! qiime emperor plot \
    --i-pcoa ClassyTree/weighted_unifrac_distance_matrix_ChemicalClasses_PCoA.qza \
    --m-metadata-file ../MetaboDistTrees/data/Metadata_DrugMetabolism_Example.txt \
    --o-visualization ClassyTree/wClassyTreeUniFrac.qzv

[32mSaved Visualization to: ClassyTree/wClassyTreeUniFrac.qzv[0m


In [9]:
q2.Visualization.load('ClassyTree/wClassyTreeUniFrac.qzv')

#### unweighted UniFrac

In [10]:
! qiime diversity beta-phylogenetic \
    --i-table ClassyTree/Buckettable_ChemicalClasses.qza \
    --i-phylogeny ClassyTree/NewickTree_ChemicalClasses.qza \
    --p-metric unweighted_unifrac \
    --o-distance-matrix ClassyTree/unweighted_unifrac_distance_matrix_ChemicalClasses.qza

[32mSaved DistanceMatrix % Properties(['phylogenetic']) to: ClassyTree/unweighted_unifrac_distance_matrix_ChemicalClasses.qza[0m


In [11]:
! qiime diversity pcoa \
    --i-distance-matrix ClassyTree/unweighted_unifrac_distance_matrix_ChemicalClasses.qza \
    --o-pcoa ClassyTree/unweighted_unifrac_distance_matrix_ChemicalClasses_PCoA.qza

[32mSaved PCoAResults to: ClassyTree/unweighted_unifrac_distance_matrix_ChemicalClasses_PCoA.qza[0m


In [12]:
! qiime emperor plot \
    --i-pcoa ClassyTree/unweighted_unifrac_distance_matrix_ChemicalClasses_PCoA.qza \
    --m-metadata-file ../MetaboDistTrees/data/Metadata_DrugMetabolism_Example.txt \
    --o-visualization ClassyTree/uwClassyTreeUniFrac.qzv

[32mSaved Visualization to: ClassyTree/uwClassyTreeUniFrac.qzv[0m


In [13]:
q2.Visualization.load('ClassyTree/uwClassyTreeUniFrac.qzv')