**DATE**: 22 October 2017 <br>
**ENVIRONMENT**: calour <br>
**AUTHOR**: Max Abramson, Tomasz Kosciolek

## The goal of this notebook is to generate heatmaps that represent the taxonomy and relative abundance of species found in the samples of a dataset. It also looks at differential abundance via a heatmap.

In [14]:
import calour as ca
import pandas as pd
import biom
%matplotlib notebook

In [39]:
ROOT = 'data/mental_illness/'
hm=ca.read(ROOT+"mental_illness_250.biom", 
           ROOT+"AGP_metadata_mental_illness_250.txt", normalize=10000)

In [68]:
hm_processed = hm.cluster_features(500)
ca.filtering.filter_samples(hm_processed, "case_control", 
                            "not applicable", negate=True, inplace=True)
ca.sorting.sort_samples(hm_processed, 'case_control', inplace=True)

Experiment filtered_bc_table_250.biom
-------------------------------------
data dimension: 250 samples, 467 features
sample IDs: Index(['10317.000060344', '10317.000038129', '10317.000065662',
       '10317.000029343', '10317.000052041', '10317.000038007',
       '10317.000043200', '10317.000058946', '10317.000046295',
       '10317.000059971',
       ...
       '10317.000037970', '10317.000050284', '10317.000033495',
       '10317.000039906', '10317.000038303', '10317.000037946',
       '10317.000034137', '10317.000039619', '10317.000046217',
       '10317.000029622'],
      dtype='object', name='sample_name', length=250)
feature IDs: Index(['TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATATCTTGAGTG',
       'TACAGAGGTCTCAAGCGTTGTTCGGAATCACTGGGCGTAAAGCGTGCGTAGGCTGTTTCGTAAGTCGTGTGTGAAAGGCGCGGGCTCAACCCGCGGACGGCACATGATACTGCGAGACTAGAGTA',
       'TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGGTTGTTAAGTCAGTTGTGAA

## Plot the entire dataset

In [76]:
hm_processed.plot('All samples; min. feature abundance: 500', barx_fields='case_control', gui= 'jupyter')

<IPython.core.display.Javascript object>

<calour.heatmap.plotgui_jupyter.PlotGUI_Jupyter at 0x11ce3de48>

# Differential abundance

### UK only

In [72]:
hm_uk = ca.filtering.filter_samples(hm_processed, "country_residence", 
                                    "UK", negate=False, inplace=False)
diff_uk = ca.analysis.diff_abundance(hm_uk, "case_control", "case", "control",
                                     method='meandiff', alpha=0.1, numperm=10000)

In [73]:
diff_uk.feature_metadata.values

array([[ list(['k__Bacteria', 'p__Firmicutes', 'c__Clostridia', 'o__Clostridiales', 'f__Ruminococcaceae', 'g__', 's__']),
        -18.35714285714286, 0.0004999500049994721],
       [ list(['k__Bacteria', 'p__Tenericutes', 'c__Mollicutes', 'o__RF39', 'f__', 'g__', 's__']),
        -10.0, 0.0005999400059993887]], dtype=object)

In [75]:
diff_uk.plot('UK differentially abundant taxa', barx_fields='case_control', gui='jupyter')

<IPython.core.display.Javascript object>

<calour.heatmap.plotgui_jupyter.PlotGUI_Jupyter at 0x11b73c080>