## Capillary Electrophoresis Analysis

This notebook reads in, formats and displays capillary electrophoresis trace files from an Agilent Fragment Analyzer.

### File Prep and Imports

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from mpl_toolkits.axes_grid1 import make_axes_locatable, AxesGrid
from IPython.display import display, clear_output
import ipywidgets as widgets
from ipywidgets import Layout, Output

In [None]:
# make sure to reconfigure these file paths for your system
base_path = r'SET INPUT FOLDER'
base_output_loc = 'SET OUTPUT LOCATION'
final_data = os.path.join(base_path, r'SET TO CAPILLARY ELECTROPHORESIS DATA FOLDER')

print('Data available:', os.path.isdir(final_data))

In [None]:
# Data Labelling - all labels are specific to particular wells/samples
base_data = final_data
prepend = '2021 10 11 14H 11M'  # results from paper
available_samples = {'A1': 'Ladder',
                     'A2': 'Single BRCA1\n Payload',
#                      'A3': 'Dual BRCA1\n Payloads ' +   r"$\bf{(DB)}$",
                     'A3': 'Dual BRCA1\n Payloads (DB)',
                     'A4': 'DB +\n Target (5 \u03BCM)',
                     'A5': 'DB +\n Target (0.436 \u03BCM)',
                     'A6': 'DB +\n Target (50 nM)',
                     'A7': 'DB +\n Mistoe (50 nM)',
                     'A8': 'DB +\n Misbody (50 nM)',
                     'A9': 'DB +\n Mismany (50 nM)',
                     'A10':'DB +\n 2-MT (50 nM)',
                     'B1': 'Single miR-141 RNA\n Payload',
                     'B2': 'Dual miR-141 RNA\n Payloads (DR)',
                     'B3': 'DR +\n Target (5 \u03BCM)',
                     'B4': 'DR +\n Target (0.5 \u03BCM)',
                     'B5': 'DR +\n Target (50 nM)',
                     'C1': 'Single Aldosterone\n Payload',
                     'C2': 'Dual Aldosterone\n Payloads (DA)',
                     'C3': 'DA +\n Target (1.5mM)',
                     'C4': 'DA +\n Target (150\u03BCM)',
                     'C5': 'DA +\n Target (0.5\u03BCM)',
                     'C6': 'DA +\n DNA Target (5\u03BCM)',
                     'D1': 'Dual HIV Block\n (Pure)',
                     'D2': 'Dual Aldosterone (DA)\n + Dual BRCA1 (DB)\n Payloads',
                     'D3': 'DB + DA\n+ 5uM BRCA target',
                     'D4': 'DB + DA\n+ 1.5mM Aldosterone',
                     'D5': 'Quad BRCA1\n Payloads (QB)',
                     'D6': 'DB + QB',
                     'D7': 'DB + QB\n+ 5\u03BCM BRCA1 target',
                     'E1': 'Dual Thrombin\n Payloads',
                     'E2': 'Dual Thrombin\n Payloads\n+ Target (5\u03BCM)',
                     'F1': 'Impure\n U4U2Thrco',
                     'F2': 'Impure\n QB',
                     'F3': 'Impure\n HR',
                     'F4': 'Impure\n U3',
                     'H12': 'Capillary\n Ladder'}

presets = {
    'BRCA tests':['A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10'],
    'RNA tests':['B1', 'B2', 'B3', 'B4', 'B5'],
    'Aldos tests': ['C1', 'C2', 'C3', 'C4', 'C5', 'C6'],
    'Thrombin tests': ['E1', 'E2'],
    'Multi-Block Tests': ['D2','D3','D4','D5','D6','D7'],
    'Others':['D1', 'F1', 'F2', 'F3', 'F4']
}

In [None]:
# Data loading and formatting 
files = []
dfs = []

for key, label in available_samples.items():
    file = '%s %s  Samp%s.csv' % (prepend, key, key)  # all files have this naming format
    file_loc = os.path.join(base_data, file)
    files.append(file_loc)
    dfs.append(pd.read_csv(file_loc, header=0).add_suffix('_%s' % key))  
    
concatenated_df = pd.concat(dfs,axis=1).abs()
intensity_df = concatenated_df[[c for c in concatenated_df.columns if 'Intensity' in c]]
s_array = intensity_df.to_numpy()  # this is the final array containing all the data

In [None]:
# main interface for selecting lanes and plotting results
%matplotlib inline
fig = None
plt.rcParams.update({'font.sans-serif':'Helvetica'})  # consistent figure formatting
mark_over_limits = True  # set to true to mark which bands have intensities above the selected limit
plot_elution_time = False  # set to true to print elution time on y-axis


def plot_preset(btn):  # plot one of the pre-selected groups of data
    selection = presets[sel_preset.value[0]]
    data_sel = intensity_df[[c for c in intensity_df.columns if c.split('_')[-1] in selection]].to_numpy()
    plot_capillary(data_sel, selection)

def plot_samples(btn):  # plot specific selected samples
    selection = sel_mul.value
    data_sel = intensity_df[[c for c in intensity_df.columns if c.split('_')[-1] in selection]].to_numpy()
    plot_capillary(data_sel, selection)

def plot_capillary(data, selection):  # vertical graph seems to be easier to read
    if plot_type.value == 'vertical':
        plot_capillary_vertical(data, selection)
    else:
        plot_capillary_horizontal(data, selection)

def plot_capillary_vertical(cap_data, data_keys, normalization=False, level_shift=1800, max_level=12000):
    # level_shift determines how much of the data to chop from the beginning of the x-axis, max_level indicates max intensity of colorbar
    
    global fig, out
    
    cmap = plt.cm.Blues
    cmap.set_bad((1, 0, 0, 1))  # colour pixels outside of the specified range red for easy viewing
    if mark_over_limits:
        cap_data[cap_data > max_level] = np.nan  # marking illegal pixels for easy viewing
    
    cap_data = cap_data[level_shift:,:]  # shift data as first section is always blank
    
    title_font = 23  # specific graph formatting
    label_font = 20
    tick_font = 16
    
    with out:  # using output context to allow for clearing and re-drawing figure if necessary
        if fig is not None:
            clear_output()
            
        # Individual Normalization
        if normalization:
            for i in range(cap_data.shape[1]):
                cap_data[:, i] = (cap_data[:, i]-cap_data[:,i].min())/(cap_data[:,i].max()-cap_data[:,i].min())

        fig, ax = plt.subplots(sharey=True, ncols=cap_data.shape[1], figsize=(1.5*len(data_keys), 8))  # main plot shares y axis throughout

        if cap_data.shape[1] == 1:
            ax = [ax]
            
        dx = 80/300  # manual offset to center labels
        # dx = 150/300  # manual offset to center labels

        offset = mpl.transforms.ScaledTranslation(dx, 0, fig.dpi_scale_trans)  # graph label offset

        for ind in range(cap_data.shape[1]):  # preparing heatmaps and visual elements
            if data_keys[ind] == 'DA +\n Target (1.5mM)':
                print(cap_data[:,ind])
            heatmap = ax[ind].imshow(np.flip(cap_data[:,ind][np.newaxis,:]).T, cmap=cmap, aspect="auto", vmin=0, vmax = max_level)
            # setting vmin=0,vmax=1.0 not required if data is normalized
            ax[ind].tick_params(axis='x',which='both', bottom=False, labelleft=False, right=False, left=False, top=False)
            ax[ind].xaxis.set_label_position('top')
            
            label = ax[ind].set_xlabel(available_samples[data_keys[ind]], fontsize=label_font, rotation=60, labelpad=10, ha='center')
            
            ax[ind].get_yaxis().set_visible(False)
            label.set_transform(label.get_transform() + offset)
            ax[ind].set_xticks([])  # blanking out x ticks
            
        if len(ax) == 1:
            c_axis = ax[0]
        else:
            c_axis = ax.ravel().tolist()

        cbar = fig.colorbar(heatmap, ax=c_axis, location='bottom', pad=0.05)

        cbar.ax.set_xlabel('Signal Intensity (a.u.)', fontsize=title_font)
        cbar.ax.tick_params(labelsize=tick_font)
#         cbar.ax.get_yaxis().labelpad = 50
        
        ax[0].get_yaxis().set_visible(True)
        
        if plot_elution_time:
            ax[0].yaxis.set_ticks([0, cap_data.shape[0]])
            ax[0].set_yticklabels(['%.1f minutes' % ((cap_data.shape[0]+level_shift)/60), '30 minutes'],fontsize=tick_font, rotation=90, va='center')
            ax[0].tick_params(axis='y', direction='out', length=12, width=1, which='both')
            ax[0].set_ylabel('Elution Time',fontsize=title_font, labelpad=-15)
        else:
            ax[0].set_yticks([])
            ax[0].set_ylabel('Elution Time',fontsize=title_font, labelpad=15)
            
        plt.show()
    
def plot_capillary_horizontal(cap_data, data_keys, normalization=False, level_shift=1800, max_level=12000):
    global fig, out
    
    cmap = plt.cm.Blues
    cmap.set_bad((1, 0, 0, 1))  # colour pixels outside of the specified range red for easy viewing
    cap_data[cap_data > max_level] = np.nan  # marking illegal pixels for easy viewing
    cap_data = cap_data[level_shift:,:]  # shift data as first section is always blank
    title_font = 23
    label_font = 20
    tick_font = 16
    
    with out:
        if fig is not None:
            clear_output()
            
        # Individual Normalization
        if normalization:
            for i in range(cap_data.shape[1]):
                cap_data[:, i] = (cap_data[:, i]-cap_data[:,i].min())/(cap_data[:,i].max()-cap_data[:,i].min())

        fig, ax = plt.subplots(sharex=True, nrows=cap_data.shape[1], figsize=(12,len(data_keys)))

        if cap_data.shape[1] == 1:
            ax = [ax]
    
        for ind in range(cap_data.shape[1]):
            heatmap = ax[ind].imshow(np.flip(cap_data[:,ind][np.newaxis,:]), cmap=cmap, aspect="auto", vmin=0, vmax = max_level)
            # setting vmin=0,vmax=1.0 not required if data is normalized
            ax[ind].tick_params(left=False, labelleft=False, right=False)
            ax[ind].set_ylabel(available_samples[data_keys[ind]], fontsize=label_font, rotation=0, va="center", ha='center', labelpad=90)
            ax[ind].get_xaxis().set_visible(False)

        if len(ax) == 1:
            c_axis = ax[0]
        else:
            c_axis = ax.ravel().tolist()
        cbar = fig.colorbar(heatmap, ax=c_axis)

        cbar.ax.set_ylabel('Signal Intensity\n (Arbitrary Units)', fontsize=title_font, rotation=270)
        cbar.ax.tick_params(labelsize=tick_font)
        cbar.ax.get_yaxis().labelpad = 50
        
        ax[-1].xaxis.set_ticks([0, cap_data.shape[0]])
        ax[-1].set_xticklabels(['%.1f minutes' % ((cap_data.shape[0]+level_shift)/60), '30 minutes'],fontsize=tick_font)
        ax[-1].get_xaxis().set_visible(True)
        ax[-1].set_xlabel('Elution Time',fontsize=title_font, labelpad=-15)
        ax[-1].tick_params(axis='x', direction='out', length=12, width=1, which='both')
        
        plt.show()
        

def save_plot(btn):
    fig.savefig(save_loc.value, bbox_inches='tight', dpi=300)
    
# GUI LAYOUT AND FORMATTING 
layout = widgets.Layout(width='auto') #set width and height
sel_mul = widgets.SelectMultiple(
                                    options=list(available_samples.keys()),
                                    value=['A1'],
                                    rows=10,
                                    description='Available Samples',
                                    layout=Layout(width="20%",  justify_content="center"),
                                    disabled=False,
                                    style={'description_width': 'initial'}
                                 )
sel_preset = widgets.SelectMultiple(
                                    options=list(presets.keys()),
                                    value=['BRCA tests'],
                                    description='Preset Combinations',
                                    layout=Layout(width="30%"),
                                    disabled=False,
                                    style={'description_width': 'initial'}
                                 )
save_loc = widgets.Text(
#                             value = os.path.join(os.getcwd(), 'cap_output.pdf'),
                            value = os.path.join(base_output_loc, 'cap_output.png'),
                            placeholder='Enter full filename here',
                            description='',
                            disabled=False,
                            style={'description_width': 'initial'}
                        ) 

plot_type = widgets.Dropdown(
                                options=['vertical', 'horizontal'],
                                value='vertical',
                                description='',
                                disabled=False,
                                style={'description_width': 'initial'}
                            )

save_stack = widgets.VBox([widgets.Label('Output Filename:', layout=widgets.Layout(display='flex',justify_content='center')),
                           save_loc,
                           widgets.Label('Plot Type:', layout=widgets.Layout(display='flex',justify_content='center')),
                           plot_type])

sample_btn = widgets.Button(button_style='success',description = 'View Selected Samples', layout=layout)
sample_btn.on_click(plot_samples)

preset_btn = widgets.Button(button_style='success',description = 'View Selected Pre-set', layout=layout)
preset_btn.on_click(plot_preset)

save_btn = widgets.Button(button_style='info',description = 'Save Figure', layout=layout)
save_btn.on_click(save_plot)
btns = widgets.VBox([sample_btn, preset_btn, save_btn])

display(widgets.HBox([sel_mul,sel_preset, btns, save_stack]))

out = Output()  # this setup allows me to clear the figure without having to clear the widgets too
display(out)