In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import glob 

#defining colors
purple = mcolors.to_rgba('#4A1B4F')
pink = mcolors.to_rgba('#CC2A57')
blue = mcolors.to_rgba('#417CBF')
green = mcolors.to_rgba('#23967C')
yellow = mcolors.to_rgba('#D9A433')
grey = mcolors.to_rgba('#595959')
orange = mcolors.to_rgba('#EF5F33')
black = mcolors.to_rgba('#000000')

In [None]:
#function to make the calibration curve
def calcurve(filepath, coordinate, start_marker_cal):
    with open(filepath) as file:
        lines = file.readlines()   
    end_marker_cal = 'Comment' 
    start_index_cal = next((i for i, line in enumerate(lines) if start_marker_cal in line), None)+2
    end_index_cal = (next((i for i, line in enumerate(lines) if end_marker_cal in line), None))-2
    lines_to_skip = list(range(0, start_index_cal)) + list(range(end_index_cal+1, len(lines)))
    calcurve = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')
    calcurve = calcurve.drop(['#', 'Weight', 'Error', 'Active','Virtual','Data Name'], axis=1)
    calcurve['log(MW)'] = np.log10(calcurve['M.W.'])
    
    x = calcurve['Time(min)'].values
    y = calcurve['log(MW)'].values

    coeff = np.polyfit(x,y,coordinate)
    poly_function = np.poly1d(coeff)

    x_values = np.linspace(min(x), max(x), 100)
    y_values = poly_function(x_values)

    return poly_function

In [None]:
#this function processes the data to the standards to put apparent MW on the x-axis and normalize the intensity for the y-axis
def process(filepath, csvout,  start_marker, end_marker, coordinate, start_marker_cal, start_time, end_time, blank_filepath):
    poly_function = calcurve(filepath, coordinate, start_marker_cal)
    with open(filepath) as file:
        lines = file.readlines()
    
    #indices for where data being pulled from the text file
    start_index = next((i for i, line in enumerate(lines) if start_marker in line), None)+7
    end_index = (next((i for i, line in enumerate(lines) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip = list(range(0, start_index)) + list(range(end_index+1, len(lines)))
    
    with open(blank_filepath) as file:
        lines_blank = file.readlines()
    start_index_blank = next((i for i, line in enumerate(lines_blank) if start_marker in line), None)+7
    end_index_blank = (next((i for i, line in enumerate(lines_blank) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip_blank = list(range(0, start_index_blank)) + list(range(end_index_blank+1, len(lines_blank)))
    
    blank = pd.read_csv(blank_filepath, skiprows=lines_to_skip_blank, header=0, delimiter='\t')
    df = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')

    df['Intensity'] -= blank['Intensity'].values
    
    start = df.index[df['R.Time (min)'] == start_time].tolist()[0]
    end = df.index[df['R.Time (min)'] == end_time].tolist()[0]
    rows = range(start, end)
    columns = ['R.Time (min)', 'Intensity']
    data = df.loc[rows, columns]
    
    data['MW_PEG'] = poly_function(data['R.Time (min)'])
    data['MW_PEG'] = 10 ** data['MW_PEG']
    max_intensity = data['Intensity'].max()
    min_intensity = data['Intensity'].min()
    
    data['normalized RI'] = (data['Intensity'] - min_intensity) / (max_intensity - min_intensity)
    data.to_csv(csvout, index=False)
    
    processed = data[['MW_PEG', 'normalized RI']]
    return processed

In [None]:
#this function makes the plot
def plotting(data_to_plot, colors, labels, title, xmin, xmax, plotout, lines, DMF_data):
    
    fig = plt.figure()
    for i, df in enumerate(data_to_plot):
        plt.plot(df['MW_PEG'], df['normalized RI'], label=labels[i], color=colors[i], linestyle=lines[i])
                
    plt.xlabel('MW$_P$$_E$$_G$ (Da)')
    plt.ylabel('normalized RI (mV)')
    plt.title(title)
    plt.legend(labels=labels)
    plt.xlim(xmin,xmax)
   
    plt.show()
    fig.savefig(plotout, format='png')

In [None]:
#filepaths for data, you can add as many as you want, just add the necessary filepath, csvout, and blank and be sure to append to data_to_plot
DMF_filepath = 'file.txt'
DMF_csvout = 'csvout.csv'
DMF_blank = 'blank.txt'
DMF_data = process(DMF_filepath, DMF_csvout, '[LC Chromatogram(Detector A-Ch1)]', '[LC Chromatogram(Detector B-Ch1)]', 5, '[GPC Calibration Curve Table(Detector A)]', 10, 18, DMF_blank)
data_to_plot = [DMF_data]

DMF_filepath2 = 'file2.txt'
DMF_csvout2 = 'csvout2.csv'
DMF_blank2 = 'blank2.txt'
DMF_data2 = process(DMF_filepath2, DMF_csvout2, '[LC Chromatogram(Detector A-Ch1)]', '[LC Chromatogram(Detector B-Ch1)]', 5, '[GPC Calibration Curve Table(Detector A)]', 10, 18, DMF_blank2)
data_to_plot.append(DMF_data2)

DMF_filepath3 = 'file3.txt'
DMF_csvout3 = 'csvout3.csv'
DMF_blank3 = 'blank3.txt'
DMF_data3 = process(DMF_filepath3, DMF_csvout3, '[LC Chromatogram(Detector A-Ch1)]', '[LC Chromatogram(Detector B-Ch1)]', 5, '[GPC Calibration Curve Table(Detector A)]', 10, 18, DMF_blank3)
data_to_plot.append(DMF_data3)

#uncomment to add another trace; copy and change # to add even more
# DMF_filepath4 = 'file4.txt'
# DMF_csvout4 = 'csvout4.csv'
# DMF_blank4 = 'blank4.txt'
# DMF_data4 = process(DMF_filepath4, DMF_csvout4, '[LC Chromatogram(Detector A-Ch1)]', '[LC Chromatogram(Detector B-Ch1)]', 5, '[GPC Calibration Curve Table(Detector A)]', 10, 18, DMF_blank4)
# data_to_plot.append(DMF_data4)

#define color for each trace
colors = [purple, pink, green]

#define data labels
labels = ['1', '2', '3']

#define the linestyle for each datapoint
lines = ['-', '--', ':']

#define title
title = 'title'

#define x-axis limits
xmin = 500
xmax = 30000

#define where to save the output plot
plotout = 'GPCplots.png'

plotting(data_to_plot, colors, labels, title, xmin, xmax, plotout, lines, DMF_data)