In [2]:
import os
from math import ceil
from itertools import repeat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)  # if you need to view less rows, comment
# pd.set_option('display.max_rows', None)  # if you need to view more rows, uncomment
idx_slice = pd.IndexSlice

## Collect chromatograms

from a directory

In [7]:
file_directory = '/home/kmeador/SEC'

In [None]:
analysis_files = [file for file in os.listdir(file_directory) if file.endswith('.csv')]

from file(s)

In [7]:
file_location = '/home/kmeador/SEC' # escher
# file_location = '/home/kylemeador' # powerpanda
file_directories = repeat(file_location)
file_names = ['123021_T33_G8_F3_F9_450ul_strait_elution',
#               '',
             ]

In [8]:
# extension = 'csv'
# analysis_file = os.path.join(file_location, '%s.%s' % (file_name, extension))

In [8]:
def create_file_path(file_name, base_directory=None, extension='.csv')
    return os.path.join(base_directory if base_directory else '', '%s%s' % (file_name, extension))

In [None]:
analysis_files = list(map(create_file_path, file_names, file_directories))

## Check DataFrame columns and values

Ensure that the csv file has properly formatted headers. If there are commas in your chromatogram name, the export and load process may be corrupted.

#### for datasets that are exported with volumes for every measurement

In [107]:
volume_is_index = False
# sec_data_df = pd.read_csv(analysis_file, header=[0,1])
sec_data_dfs = [pd.read_csv(analysis_file, header=[0,1]) for analysis file in analysis_files]

#### For datasets that are exported with normalized volumes (every measurement is indexed to the same volume)

In [107]:
volume_is_index = True
# sec_data_df = pd.read_csv(analysis_file, header=[0,1], index_col=0)
sec_data_dfs = [pd.read_csv(analysis_file, header=[0,1], index_col=0) for analysis file in analysis_files]

Check if the data is formatted as requested

In [107]:
sec_data_dfs[0].head()

Unnamed: 0_level_0,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01,10E Run01
Unnamed: 0_level_1,UV3 (280 nm)_volume,UV3 (280 nm)_mAU,Conductivity_volume,Conductivity_mS/cm,%B_volume,%B_%,delta Col Pressure_volume,delta Col Pressure_MPa,Temperature_volume,Temperature_C,Baseline of UV3 (280 nm)_volume,Baseline of UV3 (280 nm)_mAU
0,-0.993075,0.0,-0.993075,6.052461,-0.993075,100.0,-0.987753,0.01379,-0.993075,11.217049,-0.993075,-326.48468
1,-0.990585,0.0,-0.990585,6.052461,-0.989755,100.0,-0.978574,0.0,-0.990585,11.217049,-0.990585,-326.43804
2,-0.988681,0.0,-0.988681,6.052692,-0.980966,100.0,-0.966269,0.068948,-0.988681,11.217049,-0.988681,-326.402336
3,-0.986777,,-0.986777,6.052461,-0.968759,100.0,-0.953769,0.27579,-0.986777,11.217049,-0.986777,
4,-0.984384,,-0.984384,6.052461,-0.956259,100.0,-0.941269,0.427475,-0.984384,11.217049,-0.984384,


In [None]:
run_titles = []
for df in sec_data_dfs:
    run_titles.extend(df.columns.levels[0].to_list())
print('The following chromatograms are available for analysis: \n%s' % '\n'.join(run_titles))

## Plotting Options

Include specific keywords for plots to include using strings from the title of each run

In [5]:
chromatogram_keywords = []

How should plots be generated?

In [51]:
only_plot_A280 = True # False # 
overlap_chromatograms = False # True #  | Will take priority over stack figures, so if True, then regardless, the chromatograms will stack
stack_figures = True # False # 
plot_fractions = True # False # 
fraction_size = 0.5
fraction_start = 8.068
fraction_end = 25.0

In [106]:
def split_biorad_chromatogram_dataframe_to_unique_runs(df):
    return [df.loc[:, idx_slice[run, :]] for run in df.columns.levels[0].to_list()]

In [60]:
def plot_chromatograms(sec_runs, chromatogram_keywords=None, only_280=True, volume_is_column=False, 
                       stack_figures=False, overlap_chromatograms=False, annotate=True,
                       fractions=False, fraction_start=None, fraction_end=None, fraction_size=0.5):

    figure_aspect_ratio = (15, 5)
    fig = plt.figure(figsize=figure_aspect_ratio)
    
    if not chromatogram_keywords:
        chromatogram_keywords = []
#         plot = True
    else:
#         plot = False
        runs_of_interest = []
        for idx, run_df in enumerate(sec_runs):
            # check whether the particular chromatogram is the desired one based off of keywords
            df_name = run_df.columns.levels[0][0]  # level zero, label zero
            for keyword in chromatogram_keywords:
                if keyword in df_name:
                    runs_of_interest.append(run_df)
                    break
    #         if plot and chromatogram_keywords:  # keyword has been found, reset trigger for next iteration
    #             plot = False
    #         elif not chromatogram_keywords :  # keywords weren't requested, maintain True
    #             plot = True
    #         else:  # keyword hasn't been found, skip this plot
    #             continue
        sec_runs = runs_of_interest
    
            
    if overlap_chromatograms:
        axis = fig.subplots(1, 1, sharex=True)
        axes = [axis for _ in range(len(sec_runs))]
    elif stack_figures:
        figs = [plt.figure(figsize=figure_aspect_ratio) for _ in range(len(sec_runs))]
        axes = [fig.subplots(1, 1, sharex=True) for fig in figs]
    else:
        axes = fig.subplots(len(sec_runs), 1, sharex=True)
            
    for idx, run_df in enumerate(sec_runs):
        uv_columns = []
        volume = None
        for column in run_df.columns.levels[-1].to_list():
            if column.startswith('UV'):
                if column.endswith('_volume'):
                    volume = column
                elif only_280:
                    if '280 nm' in column:
                        uv_columns.append(column)
                    else:
                        continue
                else:
                    uv_columns.append(column)
                    

        if volume_is_column and volume:      
            selected_columns = [volume] + uv_columns
        else:
            selected_columns = uv_columns
        
#         volume, uv_columns = chromatogram_graph_data[idx] # [vol_idx], chromatogram_graph_data[idx][uv_idx]
        run_df.columns = run_df.columns.remove_unused_levels()  # .unique()
        formatted_df = run_df.loc[:, idx_slice[:, selected_columns]].droplevel(0, axis=1)
        # grab first level, first (and only index)
        # df_title = run_df.columns.levels[0].unique()  # Doesn't work as grabs old levels from original DF
        df_title = run_df.columns.get_level_values(0).unique()
        if len(df_title) != 1:
            print('Error, multiple headers detected in dataframe %s!\n%s' % (df_title, run_df.head()))
            break
        # formatted_df = formatted_df[formatted_df[volume] > 0.007]
        # format the volume as the dependent variable
        if volume_is_column:      
            formatted_df.index = formatted_df[volume]
            formatted_df.drop(volume, axis=1, inplace=True)
        formatted_df.index.name = 'Volume'
        
        # format column names to be pretty
        formatted_df.columns = formatted_df.columns.map(dict(zip(formatted_df.columns, 
            map(str.strip, 
                map(str.strip, 
                    map(str.replace, 
                        map(str.replace, formatted_df.columns, repeat('_mAU'), repeat('')),
                        repeat('UV'), repeat('')), 
                    repeat('1234567890 ')), 
                repeat('()')))))
#         formatted_df.plot(title=df_title[0])
        axes[idx].plot(formatted_df.index.values, formatted_df.values, label=df_name) # formatted_df.columns)
        axes[idx].legend()
        axes[idx].set_xlim(0, None)
        if fractions:
            if not annotate:
                continue
            if not fraction_start:
                fraction_start = 0.
            if not fraction_end:
                fraction_end = formatted_df.index[-1]
            number_of_fractions = (fraction_end - fraction_start) / fraction_size
            fraction_start_volumes = [fraction_start + (fraction_size * fraction) for fraction in range(ceil(number_of_fractions))]
            axes[idx].vlines(fraction_start_volumes, 0, 1, transform=axes[idx].get_xaxis_transform(), label='Fractions', colors=['#cccccc' if idx % 2 == 1 else '#000eee' for idx, _ in enumerate(fraction_start_volumes)])
            max_height = formatted_df.max()
            for f_idx, volume in enumerate(fraction_start_volumes, 1):
#                 axes[idx].annotate(f_idx, xy=(volume, max_height),
                axes[idx].annotate(f_idx, xy=(volume, 1),  xycoords=('data', 'axes fraction'), textcoords=('data', 'axes fraction') , # xycoords=axes[idx], textcoords=axes[idx],  #   # xytext=(-3, np.sign(l)*3),
                                   horizontalalignment='left', verticalalignment='top')
#                 matplotlib.patches.Rectangle(xy, width, height, angle=0.0, **kwargs)
            if overlap_chromatograms:
                annotate = False
#         if stack_figures:
#             plt.show()
#     if not stack_figures:
    plt.show()

In [97]:
sec_runs = []
for df in sec_data_dfs:
    sec_runs.extend(split_biorad_chromatogram_dataframe_to_unique_runs(df))

In [None]:
plot_chromatograms(sec_runs, chromatogram_keywords=chromatogram_keywords, only_280=only_plot_A280, 
                   overlap_chromatograms=overlap_chromatograms, stack_figures=stack_figures,
                   volume_is_column= not volume_is_index, fractions=plot_fractions,
                   fraction_start=fraction_start, fraction_end=fraction_end, fraction_size=fraction_size)