In [None]:
"""
Module: Chromatogram Comparison Plotter
Author: Fabio Rodrigues
Version: 2.0
Description: This module provides a function to load, baseline correct,
             and plot two chromatograms for comparison. It handles data
             from CSV files and extracts sample information for labeling
             from a separate text file.
"""

__author__ = "Fabio Rodrigues"
__version__ = "2.0"

# Version 2

2025/04/29

By Fabio Rodrigues

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from scipy import signal
from scipy.optimize import curve_fit

In [None]:
# # The line below changes the graph size shown on screen
plt.rcParams['figure.dpi'] = 120

sns.set_style("darkgrid", {"grid.color": "w",
                           "grid.linestyle": "-",
                           'axes.edgecolor': 'k',
                           'xtick.bottom' : True,
                           'ytick.left':True})


In [None]:

def plotter_chrom_compare(
    folderA, exp_numberA, xminA, xmaxA, degreeA, xlimA, ylimA, colourA,
    folderB, exp_numberB, xminB, xmaxB, degreeB, xlimB, ylimB, colourB):
    
    
    """Performs data loading, manipulation, and plotting for 
    chromatogram analysis. It allows the independent baseline correction of two
    chromatograms, and compare them in the same plot.

    The correct path setting is essential for correct execution, please make sure
    to adjust these accordingly. The current path are only illustrative of the 
    expected structure.

    Parameters:
    folder (int)     : Folder identifier.
    exp_number (int) : Experiment number.
    xmin (float)     : Minimum time value for data slicing.
    xmax (float)     : Maximum time value for data slicing.
    degree (int)     : Degree of the polynomial for baseline fitting.
    xlim (tuple)     : X-axis limits for the plot.
    ylim (tuple)     : Y-axis limits for the plot.
    colour (str)     : Color for the plotted chromatogram.
    
    This script defines a function `plotter_chrom` that loads chromatogram data, 
    slices it based on time range, applies baseline drift correction, and plots
    the corrected chromatogram. It also includes helper functions for data 
    loading and baseline correction.

    Syntax:
    plotter_chrom_compare(
    folderA, exp_numberA, xminA, xmaxA, degreeA, xlimA, ylimA, colourA,
    folderB, exp_numberB, xminB, xmaxB, degreeB, xlimB, ylimB, colourB):

    Example:
    plotter_chrom_compare(
    '181023', '430', 10, 25, 10, (10, 25), (-1000, 10000),'dodgerblue',
    '181023', '431', 10, 20, 10, (10, 20), (0, 10000), 'orange')
    """
    
    # Set the figure DPI
    plt.rcParams['figure.dpi'] = 120

    # Set the seaborn style
    sns.set_style("darkgrid", {"grid.color": "w", 
                               "grid.linestyle": "-", 
                               'axes.edgecolor': 'k', 
                               'xtick.bottom': True, 
                               'ytick.left': True})
    
    
    def file_loader(folder, exp_number):
        """Loads chromatogram data and sample information.

    Reads the 'Sample_ID.txt' file to extract sample information
    and the '_Chrom.csv' file for chromatogram data.

    The script requires the helper file (Sample_ID.txt) to be present in the 
    specified directories for each experiment (folderA/exp_numberA and folderB/exp_numberB).

    Args:
        folder (str): The folder identifier containing the experiment data.
        exp_number (str): The experiment number identifier.

    Returns:
        tuple: A tuple containing:
            - dfcm (pd.DataFrame): Chromatogram data with 'RT' and 'int' columns.
            - dfms (pd.DataFrame): MS data with 'RT' and 'int' columns (currently not used in the calling function).
            - sample_info (pd.DataFrame): Sample information read from 'Sample_ID.txt'.
              Each line in this file is expected to be colon-separated (e.g., 'Key:Value').
              The function prints the entire DataFrame to the console.
              The sample name used for plot labeling is assumed to be the
              second value on the second line of this file (accessed via
              `sample_info[1][1]`). For example, if the second line of
              'Sample_ID.txt' is 'Sample_Name:MySample', then 'MySample'
              will be used as part of the plot label.

    Example of Sample_ID.txt structure:
        Experiment:Control
        Sample_Name:SampleX
        Condition:Untreated
        Analyst:Fabio
        Date:2023-10-23
    """
        folder = folder
        exp_numb = exp_number

        sample_info_filepath = fr"C:\Users\Documents\Data\CM\{folder}\{exp_numb}\Sample_ID.txt"
        with open(sample_info_filepath, 'r') as file:
            sample_info = pd.read_csv(file, sep=":", header=None)

        chrom_filepath = fr"C:\Users\Documents\Data\CM\{folder}\{exp_numb}\{exp_numb}_Chrom.csv"
        with open(chrom_filepath, 'r') as chrom_file:
            column_names = ["RT", "int"]
            dfcm = pd.read_csv(chrom_file, delimiter=' ', names=column_names)
               
        print(sample_info)
        print()
        return dfcm, dfms, sample_info

    def df_slicer(dfcm, xmin, xmax):
        df2cm = dfcm.copy()
        df2cm['RT'] = df2cm['RT'] / 60
        df3cm = df2cm[(df2cm['RT'] > xmin) & (df2cm['RT'] < xmax)]
        return df3cm

    def baseline_correction4(df3cm, degree, xlim, ylim, colour, sample_info):
        def func(a, b):
            return a * df3cm['RT'] + b  # line equation to correct linear drift

        # Calculates the values of a, b, and covariance
        popt, pcov = curve_fit(func, df3cm['RT'], df3cm['int'])

        # Subtracts the linear drift from the original data
        first_corr = df3cm['int'] - func(df3cm['RT'], *popt)

        # Sets the variables for the second round of correction using 
        # polynomial functions
        x_val = df3cm['RT']
        y_val = first_corr

        # Sets the polynomial order
        poly_degree = degree

        # Calculates the polynomial coefficients
        coefficients = np.polyfit(x_val, y_val, poly_degree)

        # Generates a polynomial function that will be subtracted from the data
        poly_function = np.poly1d(coefficients)

        # Subtracts polynomial function from linear-corrected data
        baseline_corrected = y_val - poly_function(x_val)

        return x_val, baseline_corrected, colour, sample_info

    dfAcm, dfAms, sample_infoA = file_loader(folderA, exp_numberA)
    dfBcm, dfBms, sample_infoB = file_loader(folderB, exp_numberB)

    dfA3cm = df_slicer(dfAcm, xminA, xmaxA)
    dfB3cm = df_slicer(dfBcm, xminB, xmaxB)

    (x_valA, baseline_correctedA, colourA, sample_infoA) = baseline_correction4(
        dfA3cm, degreeA, xlimA, ylimA, colourA, sample_infoA)
    
    x_valB, baseline_correctedB, colourB, sample_infoB = baseline_correction4(
        dfB3cm, degreeB, xlimB, ylimB, colourB, sample_infoB)

    # Combine both chromatograms in a single plot
    plt.figure(figsize=(8, 4.5), dpi=300)

    # Plot chromatogram A
    plt.plot(x_valA, baseline_correctedA, 
             color=colourA, 
             label=sample_infoA[1][1], 
             lw=0.5)

    # Plot chromatogram B
    plt.plot(x_valB, baseline_correctedB, 
             color=colourB, 
             label=sample_infoB[1][1], 
             lw=0.5)

    plt.xlim(xlimA)
    plt.ylim(ylimA)

    plt.xticks(np.arange(*xlimA, 1),
                   rotation=0,
                   size=8)

    plt.minorticks_on()
    plt.grid(which='major', linestyle='-', linewidth=0.75)
    plt.grid(which='minor', linestyle=':', linewidth=0.5)

    plt.xlabel('Time/min')
    plt.ylabel('Intensity/a.u.')
    plt.legend()
    plt.title(f"Chromatogram {sample_infoA[1][1]} x {sample_infoB[1][1]}")

    plt.show()
