# Bar plot with confidence interval and samples
This notebook plots bars from per-subject values extracted from REX (or another software) and plots the 90% confidence interval and all per-subject values as scatterplots.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
try:
    from adjustText import adjust_text  # you will need this lib if you want to plot subjects ids without overlap: pip install adjustText
except ImportError as exc:  # else we will just plot the subjects labels without any adjustment (so the position can overlap with the points and with each others)
    adjust_text = None
    pass

In [None]:
# PARAMETERS - EDIT ME
groups = [12, 12]  # set the number of items/values for each group
groups_labels = ['MCS', 'UWS']  # set names for each group
subjects_labels = range(1, sum(groups))  # set ID for each subject (can be any string, all that matters is that it is the same order as the input values: first value here is the first value in imported rex data file, etc)
show_subjects_labels = False  # show label for each subject's point?
rex_data_filepath = 'results.ROIs.rex.data.txt'

In [None]:
# Loading data from Rex csv
dfraw = pd.read_csv(rex_data_filepath, index_col=False, header=None, squeeze=True)
dfraw

In [None]:
# Extract the values for each group in a separate dataframe
df_g = []
start = 0
for g in groups:
    df_g.append(dfraw[start:start+g])
    start = g
df_g

In [None]:
# Helper functions
import numpy as np
import scipy.stats

def comp_ci(a):
    '''Calculates the 90% confidence interval from a vector.
    From the excellent SO answer by Ulrich Stern: https://stackoverflow.com/a/34474255/1121352'''
    return scipy.stats.t.interval(0.90, len(a)-1, loc=np.mean(a), scale=scipy.stats.sem(a))

In [None]:
# Plot!

# Plotting parameters
ylim = None  # limit y axis to these values. Set to None to use default limits automatically detected by matplotlib.
figsize = [3, 5]  # figure size, in inches, set to None to use default
colors = ['b', 'g', 'r', 'y', 'c', 'b']
ylabel = 'Effect sizes'
ticks = np.arange(1, 1+(width*len(groups)), width)  # do not modify this
width = 1  # width of the bars - do not change, it is an internal parameter and does not impact the visualization

# Plotting each bar
fig, ax = plt.subplots()
if figsize:
    fig.set_size_inches(figsize[0], figsize[1], forward=True)
last_j = 0
texts = []
for i, dg in enumerate(df_g):
    # Draw bars with error bar
    bar = ax.bar(ticks[i], dg.mean(), width=width, yerr=(dg.mean() - comp_ci(dg)[1]), alpha=0.5, color=colors[i], error_kw={'ecolor': 'k', 'elinewidth': 1, 'capsize': 15, 'capthick': 1, 'barsabove': False})
    # Add scatter points for each subject
    scatter_x = ticks[i]+(float(width)/2)
    ax.scatter([scatter_x] * len(dg), dg, color=colors[i], marker='x', s=30)
    # Add label for each subject scatter point
    if show_subjects_labels:
        for j, y in enumerate(dg):
            text = subjects_labels[last_j+j]
            t = ax.text(scatter_x, y, text, alpha=0.5)
            texts.append(t)
        last_j = j
# Adjust label for each subject text placement to avoid overlapping
if show_subjects_labels and adjust_text is not None:
    #texts = subjects_labels[last_j:end]
    adjust_text(texts,
                text_from_points=True,
        only_move={'text':'xy', 'objects':'x'}, force_text=0.01, force_objects=1.0) #, arrowprops=dict(arrowstyle="->", color='r', lw=0.5))
# Change the ticks to set the group name (and place the labels nicely)
ax.set_xticks([t + float(width)/2 for t in ticks])  # place in the middle of each bar (position tick t + half of bar width)
ax.set_xticklabels(groups_labels)
# Force draw the plot
plt.tight_layout()
if ylim:
    ax.set_ylim(ylim)
ax.set_xlim([ticks[0], ticks[-1]+width])
plt.ylabel(ylabel)
plt.show()