In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import glob 
import os

#defining colors
purple = mcolors.to_rgba('#4A1B4F')
pink = mcolors.to_rgba('#CC2A57')
blue = mcolors.to_rgba('#417CBF')
green = mcolors.to_rgba('#23967C')
yellow = mcolors.to_rgba('#D9A433')
grey = mcolors.to_rgba('#595959')
orange = mcolors.to_rgba('#EF5F33')
black = mcolors.to_rgba('#000000')

In [None]:
#function to make the calibration curve
def calcurve(filepath, coordinate, start_marker_cal):
    with open(filepath) as file:
        lines = file.readlines()   
    end_marker_cal = 'Comment' 
    start_index_cal = next((i for i, line in enumerate(lines) if start_marker_cal in line), None)+2
    end_index_cal = (next((i for i, line in enumerate(lines) if end_marker_cal in line), None))-2
    lines_to_skip = list(range(0, start_index_cal)) + list(range(end_index_cal+1, len(lines)))
    calcurve = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')
    calcurve = calcurve.drop(['#', 'Weight', 'Error', 'Active','Virtual','Data Name'], axis=1)
    calcurve['log(MW)'] = np.log10(calcurve['M.W.'])
    
    x = calcurve['Time(min)'].values
    y = calcurve['log(MW)'].values

    coeff = np.polyfit(x,y,coordinate)
    poly_function = np.poly1d(coeff)

    x_values = np.linspace(min(x), max(x), 100)
    y_values = poly_function(x_values)

    return poly_function


In [None]:
#this function processes the data to put apparent MW on the x-axis and normalize the intensity values on the y-axis
def process(filepath,  start_marker, end_marker, coordinate, start_marker_cal, start_time, end_time, blank_filepath):
    poly_function = calcurve(filepath, coordinate, start_marker_cal)
    with open(filepath) as file:
        lines = file.readlines()
    
    #indices for where data being pulled from the text file
    start_index = next((i for i, line in enumerate(lines) if start_marker in line), None)+7
    end_index = (next((i for i, line in enumerate(lines) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip = list(range(0, start_index)) + list(range(end_index+1, len(lines)))
    
    with open(blank_filepath) as file:
        lines_blank = file.readlines()
    start_index_blank = next((i for i, line in enumerate(lines_blank) if start_marker in line), None)+7
    end_index_blank = (next((i for i, line in enumerate(lines_blank) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip_blank = list(range(0, start_index_blank)) + list(range(end_index_blank+1, len(lines_blank)))
    
    blank = pd.read_csv(blank_filepath, skiprows=lines_to_skip_blank, header=0, delimiter='\t')
    df = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')

    df['Intensity'] -= blank['Intensity'].values
    
    start = df.index[df['R.Time (min)'] == start_time].tolist()[0]
    end = df.index[df['R.Time (min)'] == end_time].tolist()[0]
    rows = range(start, end)
    columns = ['R.Time (min)', 'Intensity']
    data = df.loc[rows, columns]
    
    data['MW_PEG'] = poly_function(data['R.Time (min)'])
    data['MW_PEG'] = 10 ** data['MW_PEG']
    max_intensity = data['Intensity'].max()
    min_intensity = data['Intensity'].min()
    
    data['normalized RI'] = (data['Intensity'] - min_intensity) / (max_intensity - min_intensity)
    # data.to_csv(csvout, index=False)
    
    processed = data[['MW_PEG', 'normalized RI']]
    max_y_row = processed.loc[processed['normalized RI'].idxmax()]
    # print(max_y_row['MW_PEG']) #prints Mp
    return processed


In [None]:
#this function calculates percent collapse
def collapse(DMF_data, aq_data):
    max_DMF = DMF_data['normalized RI'].idxmax()
    max_aq = aq_data['normalized RI'].idxmax()
    
    Mp_DMF = DMF_data.loc[max_DMF, 'MW_PEG']
    Mp_aq = aq_data.loc[max_aq, 'MW_PEG']
    
    collapse = (1-(Mp_aq / Mp_DMF)) * 100
    return collapse


In [None]:
#this function makes the plot

plt.rcParams.update({
    'font.size': 14
})
    
    
def plotting(data_to_plot, colors, labels, title, xmin, xmax, plotout, lines, DMF_data, aq_data):
    
    percent_collapse = collapse(DMF_data, aq_data)
    text = f'%collapse = {percent_collapse:.1f}'
    fig = plt.figure(figsize=(3, 3))
    for i, df in enumerate(data_to_plot):
        plt.plot(df['MW_PEG'], df['normalized RI'], label=labels[i], color=colors[i], linestyle=lines[i])
    
    plt.xlabel('apparent MW', fontsize=16)
    plt.ylabel('normalized RI', fontsize=16)
    
    plt.title(title)
    plt.xlim(xmin,xmax)
    # plt.text(0.5, 0.1, text, ha='center', va='center', transform=plt.gca().transAxes,
    #      bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5')) #print percent collapse output on the plot
        
    plt.show()
    fig.savefig(plotout, format='png')


In [None]:
DMF_filepath = 'DMF.txt'
# DMF_csvout = 'DMF.csv'
DMF_blank = 'DMF_blank.txt'
DMF_data = process(DMF_filepath, '[LC Chromatogram(Detector A-Ch1)]', '[LC Chromatogram(Detector B-Ch1)]', 5, '[GPC Calibration Curve Table(Detector A)]', 10, 18, DMF_blank)
data_to_plot = [DMF_data]

aq_filepath = 'aq.txt'
# aq_csvout = 'aq.csv'
aq_blank = 'aq_blank.txt'
aq_data = process(aq_filepath, '[LC Chromatogram(Detector B-Ch1)]', '[LC Status Trace(Pump A Pressure)]', 3, '[GPC Calibration Curve Table(Detector B)]', 10, 25, aq_blank)

data_to_plot.append(aq_data)

#define colors
colors = ['purple', 'blue']

#define labels
labels = ['organic', 'aqueous']

#defien linestyles
lines = ['-', '--']

#define title
title = ''

#define x-axis limits
xmin = 1000
xmax = 35000

#define plot output filepath
plotout = 'percent_collapse.png'

plotting(data_to_plot, colors, labels, title, xmin, xmax, plotout, lines, DMF_data, aq_data)