In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import glob 
import os
import re

#defining colors
purple = mcolors.to_rgba('#4A1B4F')
pink = mcolors.to_rgba('#CC2A57')
blue = mcolors.to_rgba('#417CBF')
green = mcolors.to_rgba('#23967C')
yellow = mcolors.to_rgba('#D9A433')
grey = mcolors.to_rgba('#595959')

In [None]:
#function to make the calibration curve
def calcurve(filepath, color):
    with open(filepath) as file:
        lines = file.readlines()
    start_marker_cal = '[GPC Calibration Curve Table(Detector A)]'  
    end_marker_cal = 'Comment' 
    start_index_cal = next((i for i, line in enumerate(lines) if start_marker_cal in line), None)+2
    end_index_cal = (next((i for i, line in enumerate(lines) if end_marker_cal in line), None))-2
    lines_to_skip = list(range(0, start_index_cal)) + list(range(end_index_cal+1, len(lines)))
    calcurve = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')
    calcurve = calcurve.drop(['#', 'Weight', 'Error', 'Active','Virtual','Data Name'], axis=1)
    calcurve['log(MW)'] = np.log10(calcurve['M.W.'])
    
    x = calcurve['Time(min)'].values
    y = calcurve['log(MW)'].values

    coeff = np.polyfit(x,y,5)
    poly_function = np.poly1d(coeff)

    x_values = np.linspace(min(x), max(x), 100)
    y_values = poly_function(x_values)
        
    return poly_function

In [None]:
def process(filepath, csvout, color, blank):
    poly_function = calcurve(filepath, color)
    with open(filepath) as file:
        lines = file.readlines()
    
    #start and end markers of pulling data from the text file
    start_marker = '[LC Chromatogram(Detector A-Ch1)]'  
    end_marker = '[LC Chromatogram(Detector B-Ch1)]' 
    
    #indices for where data being pulled from the text file
    start_index = next((i for i, line in enumerate(lines) if start_marker in line), None)+7
    end_index = (next((i for i, line in enumerate(lines) if end_marker in line), None))-1
    
    #defines which lines to skip so those between are pulled from the text file
    lines_to_skip = list(range(0, start_index)) + list(range(end_index+1, len(lines)))

    with open(blank) as file:
        lines_blank = file.readlines()
    start_index_blank = next((i for i, line in enumerate(lines_blank) if start_marker in line), None)+7
    end_index_blank = (next((i for i, line in enumerate(lines_blank) if end_marker in line), None))-1

    lines_to_skip_blank = list(range(0, start_index_blank)) + list(range(end_index_blank+1, len(lines_blank)))
    
    #processing values; apply the function from the cal curve to the retention times to get MW_PEG; normlalize intensity
    blank = pd.read_csv(blank, skiprows=lines_to_skip_blank, header=0, delimiter='\t')
    df = pd.read_csv(filepath, skiprows=lines_to_skip, header=0, delimiter='\t')

    df['Intensity'] -= blank['Intensity'].values
    
    start = df.index[df['R.Time (min)'] == 0].tolist()[0]
    end = df.index[df['R.Time (min)'] == 18].tolist()[0]
    rows = range(start, end)
    columns = ['R.Time (min)', 'Intensity']
    data = df.loc[rows, columns]
    
    blank_data = blank.loc[rows, columns]
    data['Intensity'] -= blank_data['Intensity'].values
    
    data['MW_PEG'] = poly_function(data['R.Time (min)'])
    data['MW_PEG'] = 10 ** data['MW_PEG']
    max_intensity = data['Intensity'].max()
    min_intensity = data['Intensity'].min()

    data['normalized RI'] = (data['Intensity'] - min_intensity) / (max_intensity - min_intensity)
    # data.to_csv(csvout, index=False)
    
    processed = data[['MW_PEG', 'normalized RI']]
    max_y_row = processed.loc[processed['normalized RI'].idxmax()]
    max_y_tr = data.loc[data['Intensity'].idxmax()]

    # print(max_y_row['MW_PEG']) #prints Mp
    
    return processed
    

In [None]:
def plotting(directory, title, x_min, x_max, colors, labels, plotout, csvout, blank):
    
    file_list = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.txt') and os.path.isfile(os.path.join(directory, f))]
    file_list.sort(key=natural_sort_key)
   
    fig = plt.figure()
    for idx, filename in enumerate (file_list):
        color = colors[idx % len(colors)]
        df = process(filename, csvout, color, blank)
        
        plt.plot(df['MW_PEG'], df['normalized RI'], color=color, label=f'Trace {idx+1} ({filename})')
        labels.append(f'Trace {idx+1} ({filename})')
            
    plt.xlabel('MW$_P$$_E$$_G$ (Da)')
    plt.ylabel('normalized RI')
    plt.title(title)
    plt.legend(labels=labels)
    plt.xlim(x_min,x_max)
        
    plt.show()
    
    fig.savefig(plotout, format='png')

In [None]:
def anumlist():
    letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
    numbers = range(1, 13)  # Generates numbers from 1 to 12

    alphanumeric_list = []

    for letter in letters:
        for number in numbers:
            alphanumeric_list.append(f"{letter}{number}")

    return alphanumeric_list

def natural_sort_key(file_path):
    filename = os.path.basename(file_path)
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', filename)]

In [None]:
#define directory where files are stored, this will try to plot ant .txt file in the directory
directory = 'directory/path'

#define the blank filepath
blank = 'blank.txt'

#define title
title = ''

#define x-axis limits
x_min = 0
x_max = 20000

#define filepath for plot
plot_output_file = 'GPCplot.png'

#define colors for each trace
num_colors = 96
colors = plt.cm.tab20(np.linspace(0, 1, num_colors))

#define labels for each trace
labels = ['1', '2', '3']

#define linestyle for each trace
lines = ['-', '--', '-.']

#define the filepath you want to save the raw data to
csv_output_file = 'outputcsv.csv'

plotting(directory, title, x_min, x_max, colors, labels, plot_output_file, csv_output_file, blank)