# Bar plot with confidence interval and samples
This notebook plots a bar graph from per-subject values extracted from REX (or another software) and plots the 90% confidence interval and all per-subject values as scatterplots.

If in addition you supply the ROIs nifti images (one per ROI), you will in addition get a nice glass brain image with all ROIs, and the bar plot will also include additional infos (ROI center coordinates + atlas regions names covered by ROI + same color of text as the glass brain).

Version 1.4.2

In [None]:
%load_ext autoreload
%autoreload 2
# BEWARE: autoreload works on functions and on general code, but NOT on new class methods:
# if you add or change the name of a method, you have to reload the kernel!
# also it will fail if you use super() calls in the classes you change
# ALSO AUTORELOAD SHOULD BE THE FIRST LINE EVER EXECUTED IN YOUR IPYTHON NOTEBOOK!!!

# Profilers:
# http://pynash.org/2013/03/06/timing-and-profiling/
# http://mortada.net/easily-profile-python-code-in-jupyter.html
# use %lprun -m module func(*args, **kwargs)
try:
    %load_ext line_profiler
    %load_ext memory_profiler
except ImportError as exc:
    pass

In [None]:
# Generate figure inside IPython Notebook (must be called before any import of matplotlib, direct or indirect!)
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.colors as pltcol
import numpy as np
import nibabel as nib
import pandas as pd
import textwrap
from nilearn import image
from nilearn import plotting
try:
    from adjustText import adjust_text  # you will need this lib if you want to plot subjects ids without overlap: pip install adjustText
except ImportError as exc:  # else we will just plot the subjects labels without any adjustment (so the position can overlap with the points and with each others)
    adjust_text = None
    pass

In [None]:
# PARAMETERS - EDIT ME
groups = [9, 9]  # set the number of items/values for each group
groups_labels = ['S2', 'W1']  # set names for each group
groups_order = range(len(groups))  # in which order we plot each bar. Leave as is to plot bars in the original order, or provide list of group nb to shuffle as you want (this is only a display parameter, it does not change the results)
#subjects_labels = range(1, sum(groups)+1)  # set ID for each subject (can be any string, all that matters is that it is the same order as the input values: first value here is the first value in imported rex data file, etc)
#subjects_labels = range(1, groups[0]+1)*len(groups)  # use this for within-subject analyses, where you want the labels to be the same range in both bars
show_subjects_labels = True  # show label for each subject's point?
show_subjects_paired = True  # paired/longitudinal analysis, both groups contain in fact the same subjects but in different conditions, enable this option to restart counting subjects ids to 1 for each condition
rex_data_filepath = ['testconjunc.cluster00%i.rex.data.txt' % i for i in xrange(1,6)]
rex_maps_filepath = ['testconjunc.cluster00%i.rex.roi.img' % i for i in xrange(1,6)]  # specify here the filenames, in the same order as the input text files, to plot the ROIs on a brain using nilearn

In [None]:
def load_maps(list_imgs, voxel_threshold=None):
    if voxel_threshold is None:
        voxel_threshold = 0.0001 # minimum threshold to consider as a voxel and not just background noise (because background voxels can be 0.000001 for example), can be float or str ('1%' to give a percentage). TODO: autodetect minimum value (can be -4, 0.02, etc) as the background and use it as the threshold value.
    # Load masks and resample to first
    imgs = []
    for img in list_imgs:
        im = image.load_img(img)
        if imgs:
            if im.shape != imgs[0].shape:
                im = image.resample_to_img(im, imgs[0])
        im = image.threshold_img(im, voxel_threshold)
        imgs.append(im)
    return imgs

In [None]:
# SANITY CHECKS
#if show_subjects_labels:
    #if len(subjects_labels) != sum(groups):
        #raise(ValueError('subjects_labels does not contain the same number of subjects as groups!'))

In [None]:
# Loading data from Rex csv
dfraw = []
nb_rois = len(rex_data_filepath)
for fpath in rex_data_filepath:
    dfraw.append(pd.read_csv(fpath, index_col=False, header=None, squeeze=True))
    if len(dfraw[-1]) != sum(groups):
        raise(ValueError('the number of values in the provided txt file is not the same as the supplied groups count (ie, you did not specify the correct number of subjects!), please check your parameters!'))
    print(dfraw[-1])

In [None]:
# Extract the values for each group in a separate dataframe
df_g = []
for i in xrange(nb_rois):
    start = 0
    for g in groups:
        df_g.append(dfraw[i][start:start+g])
        start = g
        if show_subjects_paired:
            # Reset indices to restart at 0 the subjects counting because it's the same subjects (longitudinal analysis)
            df_g[-1].index = list(range(0,len(df_g[-1])))
        #else:
            # Continue the numbering (different groups of subjects)

df_g

In [None]:
# Helper functions
import numpy as np
import scipy.stats

def comp_ci(a):
    '''Calculates the 90% confidence interval from a vector.
    From the excellent SO answer by Ulrich Stern: https://stackoverflow.com/a/34474255/1121352'''
    return scipy.stats.t.interval(0.90, len(a)-1, loc=np.mean(a), scale=scipy.stats.sem(a))

In [None]:
def find_cluster_center(im, mricron=False):
    from nilearn.image.resampling import reorder_img, coord_transform
    # Project coordinates to reduced space
    if mricron:
        im2 = reorder_img(im, resample='continuous')  # DO NOT USE: this will convert to MRIcron coordinates space (ie, [0, 100]) but it will mess things up for nilearn!
        # Get indices of nonzero values
        matches = im2.get_data().nonzero()
    else:
        # Get indices of nonzero values
        matches = im.get_data().nonzero()
    # Compute the euclidian middle of the cluster, from the nonzero values indices (= coordinates)
    center = np.mean(matches, axis=1)
    # Project center coordinates to brain space (ie, instead of [0, 100] range, it will be [-50, 50] -- I picked these numbers randomly, you see the idea)
    if not mricron:
        center = coord_transform(center[0], center[1], center[2], im.affine)  # disable this to get MRIcron space
    return center

In [None]:
# Plot ROIs on glass brain images!
if rex_maps_filepath:
    imgs = load_maps(rex_maps_filepath)

    #plotting.plot_prob_atlas(imgs, view_type="filled_contours",
    #                    title="lala", colorbar=True, cut_coords=(0,0,0), draw_cross=True, cmap=pltcol.ListedColormap(['b', 'g', 'r', 'c', 'm'], name='from_list', N=None))
    #plotting.plot_roi(imgs[0])

    centers = []
    fig = plotting.plot_glass_brain(None, title='ROIs', cmap=plt.cm.prism, alpha=0.5)  # initialize the glass brain images
    for c, im in enumerate(imgs):
        # For each ROI
        # Get the center (to plot the marker)
        center = find_cluster_center(im)
        centers.append(center)
        print('Center found at: ' + str(center))
        # Assign a unique value to this cluster's voxels (to get a different color)
        imdata = im.get_data()  # Convert to numpy structure
        imdata[imdata != 0] = c+1  # Assign unique value
        im2 = nib.Nifti1Image(imdata, affine=im.affine)  # convert back to a nifti file in-memory to supply to nilearn
        # Show colorbar?
        cbar = False
        if c == (nb_rois-1):
            # Can only plot the colorbar at the last iteration, else nilearn will spit an error (cannot use multiple colorbars)
            cbar = True
        # Plot the clusters
        fig.add_overlay(im2, vmin=1, vmax=nb_rois, cmap=plt.cm.prism, colorbar=cbar)
        # Plot the markers (clusters' centers)
        fig.add_markers([center], marker_color=['k'], marker_size=20)

# Save figure
fig.savefig('rois_glass_brain.png', bbox_inches='tight')
print('Image saved in rois_glass_brain.png')

In [None]:
from nilearn import plotting, datasets

def get_atlas_label(atlas, region_idx):
    """
    Get atlas label for one specific region index
    """
    return atlas['labels'][atlas['indices'].index(str(region_idx))]

def get_atlas_labels(imgs, atlas_choice='aal2', verbose=False):
    """
    Get the list of atlas regions covered by clusters, from a list of nifti maps loaded in-memory via nibabel
    atlas_choice is optional, can be 'aal2' or 'SPM12'
    """
    voxel_threshold = 0.0001 # minimum threshold to consider as a voxel and not just background noise (because background voxels can be 0.000001 for example), can be float or str ('1%' to give a percentage). TODO: autodetect minimum value (can be -4, 0.02, etc) as the background and use it as the threshold value.
    # Atlas
    atlas_choice = 'aal2' # anatomytoolbox or aal2
    if atlas_choice == 'anatomytoolbox':
        atlas_path = 'masks\AnatomyToolbox_Atlas_Map.nii'  # TODO: build atlas variable with all infos and data (labels, indices, nib niftiimage with affine etc)
    else:
        atlas = datasets.fetch_atlas_aal(version='SPM12', data_dir='atlas')

    # Show some infos about atlas
    atlas_im = image.load_img(atlas.maps)
    if verbose:
        print('Atlas shape: %s' % str(atlas_im.shape))
    nb_regions = len(np.unique(atlas_im.get_data()))- 1
    if verbose:
        print('%i regions in this atlas: %s' % (nb_regions, str(np.unique(atlas_im.get_data())))) # 48 regions because 0 is background
        print('%i labels' % len(atlas['labels']))
        print('%i indices: %s' % (len(atlas['indices']), atlas['indices']))
        print(atlas.keys())

    # Resample masks to atlas size
    imgs2 = []
    for img in imgs:
        if img.shape != atlas_im.shape:
            img = image.resample_to_img(img, atlas_im)
        img = image.threshold_img(img, voxel_threshold)
        imgs2.append(img)
    imgs = imgs2
    del imgs2
    imgs[0].shape

    # Extract activated atlas brain regions for each mask
    maps_regions = []
    maps_regions_idxs = []
    maps_regions_count = []
    for img in imgs:
        # Extract only non zeros voxels indices from mask
        im_data = img.get_data()
        #np.extract(im_data>0, im_data)
        vox_thres = np.nonzero(im_data)
        # Compare with atlas regions to extract region indices
        atlas_data = atlas_im.get_data()
        region_indices = set()
        region_count = {}
        for x in zip(*vox_thres): # walk through all non zero voxels of mask
            region_idx = atlas_data[x] # get equivalent voxel from atlas
            if region_idx != 0: # if not background
                # Append region index into the set (so that they are unique)
                region_indices.add(region_idx)
                # Increase the count of voxels activated in this region
                region_label = get_atlas_label(atlas, region_idx)
                if region_label not in region_count:
                    region_count[region_label] = 0
                region_count[region_label] += 1
        if 0 in region_indices:
            region_indices.remove(0) # remove background, not part of the atlas labels
        if verbose:
            print('Atlas indices of brain regions activated in current mask: %s' % str(sorted(region_indices)))
        # Extract brain region names from atlas that are present in this mask
        matching_idxs = [int(idx) in region_indices for idx in atlas['indices']]
        map_brain_regions = filter(None, [label if match else None for label, match in zip(atlas['labels'], matching_idxs)])
        maps_regions.append(map_brain_regions)
        maps_regions_idxs.append(region_indices)
        maps_regions_count.append(region_count)

    return maps_regions, maps_regions_idxs, maps_regions_count

if rex_maps_filepath:
    maps_regions, _, _ = get_atlas_labels(imgs)
    print('Found the following regions covered by the ROIs clusters:')
    for i in xrange(nb_rois):
        print('ROI %i: %s' % (i, ', '.join(maps_regions[i])))

In [None]:
# Plot!

# Plotting parameters
ylim = None  # limit y axis to these values. Set to None to use default limits automatically detected by matplotlib.
figsize = [2*nb_rois, 5]  # figure size, in inches, set to None to use default
colors = ['b', 'g', 'r', 'y', 'c', 'b']
ylabel = 'Effect sizes'
width = 1  # width of the bars - do not change, it is an internal parameter and does not impact the visualization
ticks = np.arange(1, 1+(width*len(groups)*nb_rois), width)  # do not modify this

# Plotting each bar
fig, ax = plt.subplots()
if figsize:
    fig.set_size_inches(figsize[0], figsize[1], forward=True)
for roi_id in xrange(nb_rois):
    last_j = 0
    texts = []
    for i, gi in enumerate(groups_order):
        # Get the data for the selected group
        dg = df_g[len(groups)*roi_id + gi]
        # Draw bars with error bar
        bar = ax.bar(ticks[len(groups)*roi_id+i], dg.mean(), width=width, yerr=(dg.mean() - comp_ci(dg)[1]), alpha=0.5, color=colors[i], error_kw={'ecolor': 'k', 'elinewidth': 1, 'capsize': 15, 'capthick': 1, 'barsabove': False})
        # Add scatter points for each subject
        scatter_x = ticks[len(groups)*roi_id+i]+(float(width)/2)
        ax.scatter([scatter_x] * len(dg), dg, color=colors[i], marker='x', s=30)
        # Add label for each subject scatter point
        if show_subjects_labels:
            for j, y in enumerate(dg):
                text = dg.index.values[j] + 1
                t = ax.text(scatter_x, y, text, alpha=0.5)
                texts.append(t)
            last_j += j+1
    # Adjust label for each subject text placement to avoid overlapping
    if show_subjects_labels and adjust_text is not None:
        #texts = subjects_labels[last_j:end]
        adjust_text(texts,
                    text_from_points=True,
            only_move={'text':'xy', 'objects':'x'}, force_text=0.01, force_objects=1.0) #, arrowprops=dict(arrowstyle="->", color='r', lw=0.5))

# Change the ticks to set the groups names (and place the labels nicely)
ax.set_xticks([t + float(width)/2 for t in ticks])  # place in the middle of each bar (position tick t + half of bar width)
ax.set_xticklabels(groups_labels * nb_rois)
# Add ROIs centers, colors and names if available (ie, if maps are provided)
norm = pltcol.Normalize(vmin=1,vmax=nb_rois)  # need to normalize the values we will input to the colormap to be onpoint with nilearn
for ri in xrange(nb_rois):
    ax.text((ri+1)*2, -0.45, '%.0f,%.0f,%.0f\n%s' % (centers[ri][0], centers[ri][1], centers[ri][2], '\n'.join(textwrap.wrap(', '.join(maps_regions[ri]), width=30))),
            rotation=0, verticalalignment='top', horizontalalignment='center',
            #transform=ax.transAxes,
            color=plt.cm.prism(norm(ri+1)), fontsize=10)
    # pltcol.rgb2hex(plt.cm.prism(norm(ri+1)))  # to get the hex value of the color
# Force draw the plot (with tight layout)
plt.tight_layout()
if ylim:
    ax.set_ylim(ylim)
ax.set_xlim([ticks[0], ticks[-1]+width])
plt.ylabel(ylabel)
plt.show()

# Save the figure
fig.savefig('rois_bars.png', bbox_inches='tight')
print('Image saved in rois_bars.png')