In [1]:
import calour as ca
%matplotlib notebook

  from ._conv import register_converters as _register_converters


In [2]:
ca.set_log_level(11)

# Load the ants dataset
Data from:

"Dramatic Differences in Gut Bacterial Densities Correlate with Diet and Habitat in Rainforest Ants"

Jon G. Sanders,1,*,† Piotr Łukasik,‡ Megan E. Frederickson,§ Jacob A. Russell,‡ Ryuichi Koga,¶
Rob Knight† and Naomi E. Pierce*

In [3]:
dat=ca.read_amplicon('./all.withtax.biom','./map.txt',normalize=10000,min_reads=1000)

2018-07-12 16:17:10 INFO loaded 389 samples, 41409 features
2018-07-12 16:17:11 INFO After filtering, 315 remaining


## Filter and sort samples and features
Keeping only the five main ant genus samples (ones that have at least 14 adult samples per genus), and only interesting features (> 100 reads over all samples)

In [5]:
datc = dat.filter_samples('genus',['Camponotus','Dolichoderus','Cephalotes','Neoponera','Azteca'])
datc = datc.filter_abundance(100)
datc=datc.sort_samples('life_stage').sort_samples('primer_plate').sort_samples('colony').sort_samples('genus')

2018-07-12 16:18:16 INFO After filtering, 455 remaining


## cluster the features
simililarly behaving bacteria will be close to each other

In [6]:
datc = datc.cluster_features()

2018-07-12 16:18:53 INFO After filtering, 455 remaining


## Plot the whole experiment heatmap (fig 4A)

In [7]:
f=datc.normalize(100).plot(sample_field='genus',gui='jupyter',
                           barx_fields=['colony'],barx_label=False,
                           feature_field=None,clim=[0,100])
f.ax_hm.axes.set_xlabel('ant genus')

<IPython.core.display.Javascript object>

Text(0.5,-36.6356,'ant genus')

In [8]:
f.save_figure('figure-4A-ants-all.pdf')

# Plate dependent contaminant

For the plate contaminant, look at all samples (since we sort by plate)

In [9]:
alldatc = dat.filter_abundance(10)
alldatc=alldatc.sort_samples('life_stage').sort_samples('primer_plate').sort_samples('colony').sort_samples('genus')
alldatc=alldatc.cluster_features()

2018-07-12 16:19:22 INFO After filtering, 8292 remaining
2018-07-12 16:19:22 INFO After filtering, 8292 remaining


Interactively explore to find the candidate contamination bacteria
and print their axes ranges for the zoom plot

In [10]:
f=alldatc.normalize(100).sort_samples('primer_plate').plot(sample_field='primer_plate',gui='jupyter',
                                                           barx_fields=['genus'],barx_label=False,
                                                           feature_field=None,clim=[0,100])

<IPython.core.display.Javascript object>

## Save the plate dependent contaminant (Fig 4B)

After zooming in and exploring in the the previous plot, we plot only the specified the region showing the contaminant:

In [11]:
f=alldatc.normalize(100).sort_samples('primer_plate').plot(sample_field='primer_plate',gui='jupyter',
                                                           barx_fields=['genus'],barx_label=False,
                                                           feature_field=None,clim=[0,100],
                                                           rect=[-0.5, 314.5, 19.5, 11.5])

<IPython.core.display.Javascript object>

In [12]:
f.save_figure('figure-4B-ants-plate-contaminant.pdf')

In [13]:
# Add common dbbact term (out of contaminant / other / none)

In [14]:
datc=datc.add_terms_to_features('dbbact',['contaminant'])

2018-07-12 16:20:09 INFO Getting dbBact annotations for 455 sequences, please wait...
2018-07-12 16:20:20 INFO Got 10416 annotations
2018-07-12 16:20:20 INFO Added annotation data to experiment. Total 2220 annotations, 455 terms


## look for general reagent contaminants

keep only the 5 main genuses for the plot to make it nicer

In [16]:
general_contam = alldatc.filter_samples('genus',['Camponotus','Dolichoderus','Cephalotes','Neoponera','Azteca'])
general_contam.add_terms_to_features('dbbact',['contaminant'])

2018-07-12 16:20:47 INFO Getting dbBact annotations for 8292 sequences, please wait...
2018-07-12 16:21:02 INFO Got 36013 annotations
2018-07-12 16:21:02 INFO Added annotation data to experiment. Total 2260 annotations, 8292 terms


AmpliconExperiment ("all.withtax.biom") with 161 samples, 8292 features

Interactively explore to find the contaminant block and save the cooredinates

In [17]:
f=general_contam.normalize(100).plot(sample_field='genus',gui='jupyter',
                                     barx_fields=['colony'], barx_label=False,
                                     bary_fields=['common_term'], bary_label=False,
                                     feature_field=None,clim=[0,100])

<IPython.core.display.Javascript object>

## Save general contaminant figure (Fig 4C)
feature colorbar : orange - contaminant, blue - other, green - unknown

In [18]:
f=general_contam.normalize(100).plot(sample_field='genus',gui='jupyter',
                                     barx_fields=['colony'], barx_label=False,
                                     bary_fields=['common_term'], bary_label=False,
                                     feature_field=None,clim=[0,100],
                                     rect=[-0.5, 160.5, 145.2578125, 129.0625])

<IPython.core.display.Javascript object>

In [19]:
f.save_figure('figure-4C-ants-general-contaminant.eps', format='eps')

## saved the 3 plate dependent contaminants to "contamination.fa" using the gui

## saved the 8 general contaminants to contamination2.fa

# Filter for contaminants and keep only main genuses/life stage

## Only remove the per plate contaminants
save to ants-filtered-1.biom

In [58]:
datn=ca.read_amplicon('./all.withtax.biom', './map.txt', normalize=None, min_reads=None)

2018-07-12 16:49:11 INFO loaded 389 samples, 41409 features


In [59]:
datn=dat.filter_fasta('contamination.fa',negate=True)

In [60]:
datn=datn.filter_by_data('abundance', cutoff=1000, mean_or_sum='sum', axis='s')

2018-07-12 16:49:12 INFO After filtering, 273 remaining


In [61]:
datn.save_biom('ants-filtered-1-withtax.biom')

## Remove both sets of contaminants
save to ants-filtered-2.biom

In [62]:
datn2=ca.read_amplicon('./all.withtax.biom','./map.txt',normalize=None,min_reads=None)

2018-07-12 16:51:13 INFO loaded 389 samples, 41409 features


In [63]:
datn2=datn2.filter_fasta('contamination.fa',negate=True)

In [64]:
datn2=datn2.filter_fasta('contamination2.fa',negate=True)

In [65]:
datn2=datn2.filter_by_data('abundance',cutoff=1000, mean_or_sum='sum', axis='s')

2018-07-12 16:51:18 INFO After filtering, 277 remaining


In [66]:
datn2.save_biom('ants-filtered-2-withtax.biom')

# clean the dataset
## keeping only samples from common genus and adult life stage
For figure 4D

In [87]:
datnf = ca.read_amplicon('all.withtax.biom','map.txt',normalize=None, min_reads=None)
datf1 = ca.read_amplicon('ants-filtered-1-withtax.biom','map.txt',normalize=None, min_reads=None)
datf2 = ca.read_amplicon('ants-filtered-2-withtax.biom','map.txt',normalize=None, min_reads=None)

2018-07-12 17:10:05 INFO loaded 389 samples, 41409 features
2018-07-12 17:10:07 INFO loaded 273 samples, 41406 features
2018-07-12 17:10:08 INFO loaded 277 samples, 41398 features


In [88]:
datf2.sample_metadata['life_stage'].value_counts()

adult                 217
late instar larva      25
mid instar larva       24
early instar larva     10
pupa instar larva       1
Name: life_stage, dtype: int64

In [89]:
datnf=datnf.filter_samples('life_stage','adult')
datf1=datf1.filter_samples('life_stage','adult')
datf2=datf2.filter_samples('life_stage','adult')

In [90]:
datf2.sample_metadata['genus'].value_counts()

Camponotus        38
Dolichoderus      27
Cephalotes        25
Azteca            14
Neoponera         14
Pseudomyrmex      11
Odontomachus      10
Solenopsis        10
Eciton             9
Crematogaster      8
Pheidole           7
Myrmelachista      6
Megalomyrmex       6
Paraponera         3
Procryptocerus     3
Basiceros          3
Pachycondyla       2
Atta               2
Ectatomma          2
Daceton            2
Gnamptogenys       2
Acanthostichus     2
Gigantiops         2
Allomerus          2
Pseudoponera       2
Neivamyrmex        1
Trachymyrmex       1
Labidus            1
Name: genus, dtype: int64

In [91]:
datnf=datnf.filter_samples('genus',['Camponotus','Dolichoderus','Cephalotes','Neoponera','Azteca'])
datf1=datf1.filter_samples('genus',['Camponotus','Dolichoderus','Cephalotes','Neoponera','Azteca'])
datf2=datf2.filter_samples('genus',['Camponotus','Dolichoderus','Cephalotes','Neoponera','Azteca'])

In [92]:
datf2

AmpliconExperiment ("ants-filtered-2-withtax.biom") with 118 samples, 41398 features

In [93]:
datnf.save_biom('ants-notfiltered-subset.biom')
datf1.save_biom('ants-filtered-1-subset.biom')
datf2.save_biom('ants-filtered-2-subset.biom')

In [94]:
datx = ca.read_amplicon('ants-filtered-2-subset.biom','map.txt',normalize=10000,min_reads=1000)
datnx = ca.read_amplicon('ants-notfiltered-subset.biom','map.txt',normalize=10000,min_reads=1000)

2018-07-12 17:11:26 INFO loaded 118 samples, 41398 features
2018-07-12 17:11:27 INFO After filtering, 118 remaining
2018-07-12 17:11:28 INFO loaded 143 samples, 41409 features


2018-07-12 17:11:28 INFO After filtering, 118 remaining


In [95]:
datx=datx.cluster_features(10)
datnx=datnx.cluster_features(10)

2018-07-12 17:12:04 INFO After filtering, 3256 remaining
2018-07-12 17:12:04 INFO After filtering, 3171 remaining


In [96]:
datx=datx.sort_samples('colony')
datnx=datnx.sort_samples('colony')

## Camponotus (fig 4D)

In [None]:
tt

In [133]:
tt=datx.filter_samples('genus','Camponotus')

tt=tt.filter_prevalence(0.1, cutoff=0.1)

tt=tt.cluster_features(10)

f=tt.normalize(100).sort_samples('colony').plot(sample_field='colony',gui='jupyter',
                                                clim=[0,100], feature_field=None)

2018-07-12 17:20:50 INFO After filtering, 46 remaining
2018-07-12 17:20:50 INFO After filtering, 32 remaining


<IPython.core.display.Javascript object>

In [134]:
f.save_figure('figure-4D-ants-colony-camponotus.pdf')

## Dolichonderus (fig 4E)

In [135]:
tt=datx.filter_samples('genus','Dolichoderus')

tt=tt.filter_prevalence(0.1, cutoff=0.1)

tt=tt.cluster_features(10)

f=tt.normalize(100).sort_samples('colony').plot(sample_field='colony',gui='jupyter',
                                                clim=[0,100], feature_field=None)

2018-07-12 17:21:52 INFO After filtering, 129 remaining
2018-07-12 17:21:52 INFO After filtering, 121 remaining


<IPython.core.display.Javascript object>

In [136]:
f.save_figure('figure-4E-ants-colony-dolychonderus.pdf')

## Cephalotes

In [137]:
tt=datx.filter_samples('genus','Cephalotes')

tt=tt.filter_prevalence(0.1)

tt=tt.cluster_features(10)

f=tt.normalize(100).sort_samples('colony').plot(sample_field='colony',gui='jupyter',
                                                barx_fields=['colony'],barx_label=False,clim=[0,100])

2018-07-12 17:22:06 INFO After filtering, 94 remaining
2018-07-12 17:22:06 INFO After filtering, 91 remaining


<IPython.core.display.Javascript object>

## Neoponera

In [138]:
tt=datx.filter_samples('genus','Neoponera')

tt=tt.filter_prevalence(0.1)

tt=tt.cluster_features(10)

f=tt.normalize(100).sort_samples('colony').plot(sample_field='colony',gui='jupyter',
                                                barx_fields=['colony'],barx_label=False,clim=[0,100])

2018-07-12 17:22:07 INFO After filtering, 337 remaining
2018-07-12 17:22:07 INFO After filtering, 322 remaining


<IPython.core.display.Javascript object>