In [1]:
import calour as ca
import pandas as pd
ca.set_log_level(20)
%matplotlib notebook

In [2]:
dat=ca.read_amplicon('./all.fixids.biom','./map.txt', normalize=10000, min_reads=1000)

2017-10-12 13:38:52 INFO loaded 370 samples, 2442 features
2017-10-12 13:38:52 INFO After filtering, 340 remaining


In [3]:
datc=dat.cluster_features(10)

2017-10-12 13:38:54 INFO After filtering, 2404 remaining


In [4]:
datc

AmpliconExperiment all.fixids.biom
----------------------------------
data dimension: 340 samples, 2404 features
sample IDs: Index(['MVB2015-6269', 'TR-157', 'MVB2015-6482', 'TR-24', 'MVB2015-6626',
       'MVB2015-6316', 'MVB2015-6518', 'MVB2015-6406', 'MVB2015-6515',
       'MVB2015-6234',
       ...
       'MVB2015-6714', 'MVB2015-6577', 'MVB2015-6559', 'MVB2015-6330',
       'MVB2015-6606', 'MVB2015-6724', 'MVB2015-6666', 'MVB2015-6510',
       'TR-119', 'TR-108'],
      dtype='object', name='#SampleID', length=340)
feature IDs: Index(['TACAGAGGGTGCAAGCGTTAATCAGAATGACTGGGCGTAAAGGGCGTGTAGGTGGTTGACTAGGTTTGATGTGAAATCCCCGGGCTTAACCTGGGAATTGCGTCGAAAACGGGTCGACTGGAGTGAGATAGAGGGTTGTGGAATTTCCGG',
       'TACAGAGGGTGCAAGCGTTAATCAGAATGACTGGGCGTAAAGGGCGTGTAGGTGGTTGACTAGGTTTGATGTGAAATCCCCGGGCTTAACCTGGGAATTGCGTCGAAAACGGGTCGACTCGAGTGAGATAGAGGGTTGTGGAATTTCCGG',
       'TACGAAGGGGGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGATCGATCAGTCAGGGGTGAAATCCCGCAGCTCAACTGCGGAACTGCCTTTGATACTGTCGGTCTAGAGTATGGAAG

In [5]:
datc.sample_metadata.columns

Index(['extraction', 'BarcodeSequence', 'LinkerPrimerSequence', 'Country',
       'Date', 'Species', 'Location', 'Study.ID', 'Survey.Habitat',
       'Start.Loc', 'End.Loc', 'Experimental.Cat', 'startloc', 'endloc',
       'Experiment.ID', 'Life.Stage', 'Pond.Ind', 'Individual_ID', 'Swab_Type',
       'Extraction_ID', 'SequenceRun_ID', 'Description',
       '_calour_original_abundance'],
      dtype='object')

In [6]:
datc=datc.sort_samples('Extraction_ID')

In [7]:
datc.sample_metadata.endloc = ['Prior' if pd.isnull(i) else i for i in datc.sample_metadata.endloc]

# pond<->stream transfer experiment

In [8]:
trans=datc.filter_samples('Study.ID','Transplant')

In [9]:
trans=trans.cluster_features(10)

2017-10-12 13:39:18 INFO After filtering, 2054 remaining


In [10]:
trans=trans.sort_samples('Experimental.Cat')

## Skin

In [11]:
tskin=trans.filter_samples('Swab_Type','skin').cluster_features(10).normalize(100, axis='s')

2017-10-12 13:39:27 INFO After filtering, 1650 remaining


## All features (fig1D)

In [21]:
f = tskin.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',
                                                barx_fields=['endloc','startloc'], feature_field=None, clim=[0, 20])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11cfe62b0>

In [22]:
f.save_figure('figure-1D-skin-all.pdf')

## pond/stream cluster (fig S1A)

In [25]:
f = tskin.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',barx_fields=['endloc','startloc'],
                                        feature_field=None, clim=[0, 20], rect=[-0.5, 118.5, 1572, 1505])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x114ab77f0>

In [26]:
f.save_figure('figure-S1A-skin-source.pdf')

## extraction plate cluster (fig1e zoom2)

In [29]:
f = tskin.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',
                                                barx_fields=['endloc','startloc'], feature_field=None,
                                                clim=[0,50],
                                                rect=[-0.5, 118.5, 1648, 1598])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11dbaeb70>

In [30]:
f.save_figure('figure-1E-skin-extraction.pdf')

## extraction cluster ordered by extraction (fig1F)

In [31]:
ttskin=tskin.sort_samples('extraction')

In [32]:
f = ttskin.plot(sample_field='extraction', gui='jupyter',feature_field=None,
                clim=[0,20],
                rect=[-0.5, 118.5, 1648, 1598])
f.ax_hm.set_xlabel('extraction plate')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11f2f9240>

In [33]:
f.save_figure('figure-1F-skin-extraction-ordered-v3.pdf')

## supervised difference between pond and stream

In [34]:
dd=tskin.diff_abundance('Experimental.Cat',['P','P>P'],['S','S>S'])

2017-10-12 13:52:04 INFO 69 samples with both values
2017-10-12 13:52:04 INFO After filtering, 1472 remaining
2017-10-12 13:52:04 INFO 39 samples with value 1 (['P', 'P>P'])
2017-10-12 13:52:05 INFO method meandiff. number of higher in ['P', 'P>P'] : 75. number of higher in ['S', 'S>S'] : 40. total 115


In [38]:
f = dd.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',
                                     barx_fields=['endloc','startloc'], feature_field=None,
                                     clim=[0,20])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11362da90>

In [39]:
f.save_figure('figure-S1B-supervised-pond-stream.pdf')

## Gut

In [40]:
tgut=trans.filter_samples('Swab_Type','Intestines').cluster_features(10).normalize(100, axis='s')
tgut

2017-10-12 13:54:07 INFO After filtering, 975 remaining


AmpliconExperiment all.fixids.biom
----------------------------------
data dimension: 154 samples, 975 features
sample IDs: Index(['MVB2015-6370', 'MVB2015-6640', 'MVB2015-6642', 'MVB2015-6636',
       'MVB2015-6374', 'MVB2015-6372', 'MVB2015-6366', 'MVB2015-6644',
       'MVB2015-6368', 'MVB2015-6638',
       ...
       'MVB2015-6436', 'MVB2015-6664', 'MVB2015-6394', 'MVB2015-6460',
       'MVB2015-6712', 'MVB2015-6698', 'MVB2015-6672', 'MVB2015-6690',
       'MVB2015-6410', 'MVB2015-6666'],
      dtype='object', name='#SampleID', length=154)
feature IDs: Index(['TACAGAGGGTGCAAGCGTTAATCAGAATGACTGGGCGTAAAGGGCGTGTAGGTGGTTGACTAGGTTTGATGTGAAATCCCCGGGCTTAACCTGGGAATTGCGTCGAAAACGGGTCGACTGGAGTGAGATAGAGGGTTGTGGAATTTCCGG',
       'TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGCGCTTAACGTGGGAACTGCATTTGAAACTGGCAAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCATG',
       'TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTGGTTTATTAAGTCAGCGGTGAAAGTTTGTGGCTCAACCATAAAATTGCCGTTGA

## All features (fig1A)

In [42]:
f = tgut.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',
                                               barx_fields=['endloc','startloc'], feature_field=None,
                                               clim=[0, 20])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x12017aa20>

In [43]:
f.save_figure('figure-1A-gut-all.pdf')

## pond/stream cluster (fig1B)

In [45]:
f = tgut.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',
                                       barx_fields=['endloc','startloc'], feature_field=None,
                                       clim=[0, 20],
                                       rect=[-0.5, 153.5, 415, 300])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11f05cf98>

In [46]:
f.save_figure('figure-1B-gut-zoom.pdf')

## supervised pond/stream bacteria (fig1C)

In [47]:
tgut=trans.filter_samples('Swab_Type','Intestines').normalize_compositional(total=100)

2017-10-13 23:07:53 INFO After filtering, 2 remaining
2017-10-13 23:07:53 INFO ignoring 2 features


In [48]:
dd=tgut.diff_abundance('Experimental.Cat',['P','P>P'],['S','S>S'])

2017-10-13 23:07:55 INFO 91 samples with both values
2017-10-13 23:07:55 INFO After filtering, 1070 remaining
2017-10-13 23:07:55 INFO 47 samples with value 1 (['P', 'P>P'])
2017-10-13 23:07:55 INFO method meandiff. number of higher in ['P', 'P>P'] : 177. number of higher in ['S', 'S>S'] : 90. total 267


In [50]:
f = dd.sort_samples('Experimental.Cat').plot(sample_field='Experimental.Cat',gui='jupyter',
                                     barx_fields=['endloc','startloc'], feature_field=None,
                                     clim=[0, 20])
f.ax_hm.set_xlabel('experiment category')

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x11f05a0b8>

In [51]:
f.save_figure('figure-1C-gut-supervised-pond-stream.pdf')