In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import glob 
import os
import re

#defining colors
purple = mcolors.to_rgba('#4A1B4F')
pink = mcolors.to_rgba('#CC2A57')
blue = mcolors.to_rgba('#417CBF')
green = mcolors.to_rgba('#23967C')
yellow = mcolors.to_rgba('#D9A433')
grey = mcolors.to_rgba('#595959')
orange = mcolors.to_rgba('#EF5F33')
black = mcolors.to_rgba('#000000')

In [None]:
def calcurve(filepath, coordinate, start_marker_cal):
    with open(filepath) as file:
        lines = file.readlines()   
    end_marker_cal = 'Comment' 
    start_index_cal = next((i for i, line in enumerate(lines) if start_marker_cal in line), None)+2
    end_index_cal = (next((i for i, line in enumerate(lines) if end_marker_cal in line), None))-2
    lines_to_skip = list(range(0, start_index_cal)) + list(range(end_index_cal+1, len(lines)))
    calcurve = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')
    calcurve = calcurve.drop(['#', 'Weight', 'Error', 'Active','Virtual','Data Name'], axis=1)
    calcurve['log(MW)'] = np.log10(calcurve['M.W.'])
    
    x = calcurve['Time(min)'].values
    y = calcurve['log(MW)'].values

    coeff = np.polyfit(x,y,coordinate)
    poly_function = np.poly1d(coeff)

    x_values = np.linspace(min(x), max(x), 100)
    y_values = poly_function(x_values)

    return poly_function

In [None]:
#this function processes the data to put apparent MW on the x-axis and normalize the intensity values on the y-axis
def process(filepath,  start_marker, end_marker, coordinate, start_marker_cal, start_time, end_time, blank_filepath):
    poly_function = calcurve(filepath, coordinate, start_marker_cal)
    with open(filepath) as file:
        lines = file.readlines()

    #indices for where data being pulled from the text file
    start_index = next((i for i, line in enumerate(lines) if start_marker in line), None)+7
    end_index = (next((i for i, line in enumerate(lines) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip = list(range(0, start_index)) + list(range(end_index+1, len(lines)))
    
    with open(blank_filepath) as file:
        lines_blank = file.readlines()
    start_index_blank = next((i for i, line in enumerate(lines_blank) if start_marker in line), None)+7
    end_index_blank = (next((i for i, line in enumerate(lines_blank) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip_blank = list(range(0, start_index_blank)) + list(range(end_index_blank+1, len(lines_blank)))
    
    blank = pd.read_csv(blank_filepath, skiprows=lines_to_skip_blank, header=0, delimiter='\t')
    df = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')

    df['Intensity'] -= blank['Intensity'].values
    
    start = df.index[df['R.Time (min)'] == start_time].tolist()[0]
    end = df.index[df['R.Time (min)'] == end_time].tolist()[0]
    rows = range(start, end)
    columns = ['R.Time (min)', 'Intensity']
    data = df.loc[rows, columns]
    
    data['MW_PEG'] = poly_function(data['R.Time (min)'])
    data['MW_PEG'] = 10 ** data['MW_PEG']
    max_intensity = data['Intensity'].max()
    min_intensity = data['Intensity'].min()
    
    data['normalized RI'] = (data['Intensity'] - min_intensity) / (max_intensity - min_intensity)
    # data.to_csv(csvout, index=False)
    
    processed = data[['MW_PEG', 'normalized RI']]
    max_y_row = processed.loc[processed['normalized RI'].idxmax()]
    # print(max_y_row['MW_PEG']) #prints Mp
    return processed


In [None]:
def collapse(DMF_data, aq_data):
    max_DMF = DMF_data['normalized RI'].idxmax()
    max_aq = aq_data['normalized RI'].idxmax()
    
    Mp_DMF = DMF_data.loc[max_DMF, 'MW_PEG']
    Mp_aq = aq_data.loc[max_aq, 'MW_PEG']
    
    collapse_value = (1 - (Mp_aq / Mp_DMF)) * 100
    return collapse_value

In [None]:
def anumlist():
    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
    numbers = range(1, 13)  # Generates numbers from 1 to 12

    alphanumeric_list = []

    for letter in letters:
        for number in numbers:
            alphanumeric_list.append(f"{letter}{number}")

    return alphanumeric_list

def natural_sort_key(file_path):
    filename = os.path.basename(file_path)
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', filename)]

In [None]:
def process_files_from_directories(dmf_directory, aq_directory, output_excel, params_dmf, params_aq, dmf_blank, aq_blank, plot_directory):
    results = []
    files_dmf = {os.path.splitext(f)[0]: os.path.join(dmf_directory, f) for f in os.listdir(dmf_directory) if f.endswith('.txt')}
    files_aq = {os.path.splitext(f)[0]: os.path.join(aq_directory, f) for f in os.listdir(aq_directory) if f.endswith('.txt')}
    files_dmf = dict(sorted(files_dmf.items(), key=lambda x: (int(''.join(filter(str.isdigit, x[0]))), ''.join(filter(str.isalpha, x[0])))))
    files_aq = dict(sorted(files_aq.items(), key=lambda x: (int(''.join(filter(str.isdigit, x[0]))), ''.join(filter(str.isalpha, x[0])))))

    for filename in files_dmf:
        if filename in files_aq:
            # Process each dataset
            processed_dmf = process(files_dmf[filename], *params_dmf, dmf_blank)
            processed_aq = process(files_aq[filename], *params_aq, aq_blank)

            # Calculate collapse percentage
            collapse_value = collapse(processed_dmf, processed_aq)
            results.append({'Filename': filename, 'Collapse (%)': collapse_value})

            # Prepare data for plotting
            data_to_plot = [processed_dmf, processed_aq]
            labels = ['DMF', 'Aqueous']
            colors = [pink, blue]
            lines = ['-', '--']
            title = f"File: {filename} Collapse: {collapse_value:.1f}%"

            # Plot and save
            fig = plt.figure()
            for i, df in enumerate(data_to_plot):
                plt.plot(df['MW_PEG'], df['normalized RI'], label=labels[i], color=colors[i], linestyle=lines[i])
                
            plt.xlabel('MW$_P$$_E$$_G$ (Da)')
            plt.ylabel('normalized RI (mV)')
            plt.legend(labels=labels)
            plt.xlim(2500, 35000)  # Adjust limits as needed
            
            #print collapse on plot
            plt.text(0.5, 0.1, f'%collapse = {collapse_value:.1f}', ha='center', va='center', transform=plt.gca().transAxes,
                     bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5'))

            plotout = os.path.join(plot_directory, f"{filename}.png")
            fig.savefig(plotout, format='png')
            plt.close(fig)

    # Save results to Excel
    df_results = pd.DataFrame(results)
    df_results.to_excel(output_excel, index=False)

In [None]:
#define parameters
params_dmf = ('[LC Chromatogram(Detector A-Ch1)]', '[LC Chromatogram(Detector B-Ch1)]', 5, '[GPC Calibration Curve Table(Detector A)]', 10, 18) 
params_aq = ('[LC Chromatogram(Detector B-Ch1)]', '[LC Status Trace(Pump A Pressure)]', 3, '[GPC Calibration Curve Table(Detector B)]', 10, 25)

#define directories, this script will plot ant .txt file in the directory, it will pair them based on filename, so corresponding files must have the same filename in each directory 
dmf_directory = 'dmf/directory'
aq_directory = 'aq/directory'

#define blank filepaths
blank_aq = 'aq_blank.txt'
blank_dmf = 'dmf_blank.txt'

#define output directories
output_excel = 'collapse_output.xlsx'
plot_directory = 'collapse/plots/directory'

process_files_from_directories(dmf_directory, aq_directory, output_excel, params_dmf, params_aq, blank_dmf, blank_aq, plot_directory)