In [None]:
# Line Plot
import pandas as pd
import matplotlib.pyplot as plt

# File paths and labels
data_sets = [r #file paths here]

labels = [#file names here]

# Set figure size
plt.figure(figsize=(10, 6))

# Colors for the plot lines
colors = ["cornflowerblue", "hotpink", "mediumturquoise"]

# Set global font settings
plt.rcParams.update({
    'font.size': 30,
    'font.family': 'serif',
    'font.serif': ['Times New Roman'],
    'figure.facecolor': 'none',
    'axes.facecolor': 'none'
})

# Plotting the data
for data_set, label, color in zip(data_sets, labels, colors):
    df = pd.read_csv(data_set)
    df = df[df['Energy'] <= 3500]
    plt.plot(df['Energy'], df['Peak/Baseline Ratio'], label=label, color=color)

# Set labels and title
plt.xlabel('Energy (cm-1)')
plt.ylabel('Peak to Baseline Ratio')
plt.title('Line Plot: Peak to Baseline Ratios')

# Display legend
plt.legend(fontsize=15)  # Keeping the legend font size smaller

# Save plot with transparent background
save_path = r#path to save here
plt.savefig(save_path, format='pdf', transparent=True)

# Show the plot
plt.show()

In [None]:
#Violin plot

#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#CREATE
def create_violin_plot(data_sets, labels, save_path):
    all_data = []

    for data_set, label in zip(data_sets, labels):
        df = pd.read_csv(data_set)
        df = df[df['Energy'] <= 3500]
        df['Dataset'] = label
        all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)

    plt.figure(figsize=(10, 6), facecolor='lightpink')

    custom_colors = ["cornflowerblue", "mediumturquoise", "hotpink"]

    ax = sns.violinplot(x='Dataset', y='Peak/Baseline Ratio', data=combined_df, palette=custom_colors)

    medians = combined_df.groupby('Dataset')['Peak/Baseline Ratio'].median().values
    median_labels = [f'{round(s, 2)}' for s in medians]

    for i, label in enumerate(median_labels):
        ax.text(i + 0.05, medians[i], label, ha='left', va='center', fontweight='bold', size=12, color='white')

    plt.xlabel('Datasets')
    plt.ylabel('Peak to Baseline Ratio')
    plt.title('Violin Plot: Distribution and Density of Ratios')
    plt.tight_layout()
    plt.savefig(save_path, format='pdf')
    plt.show()

data_sets = [r #file paths here]

labels = ["file names here"]

save_path = r #path to save here
create_violin_plot(data_sets, labels, save_path)


In [None]:
#Ridgeline and derivatives plots

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os  # file path library

# Define the directory/path containing the CSV files
csv_dir = r#path to file containing CSV files with x = Energy and y = Peak/Baseline Ratio

# List all CSV files in the directory
csv_files = [os.path.join(csv_dir, file) for file in os.listdir(csv_dir) if file.endswith('.csv')]
labels = [os.path.basename(file) for file in csv_files]

#define path in which you want to save plots
save_path = r#path location

def plot_ridgeline(csv_files, labels, save_path):
    all_data = []

    for data_set, label in zip(csv_files, labels):
        df = pd.read_csv(data_set)
        df = df[df['Energy'] <= 3500]
        df['Dataset'] = label
        all_data.append(df)

    # Combine all data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)

    # Set the font type to Times New Roman and the font size for the entire plot
    plt.rcParams['font.family'] = 'Times New Roman'
    plt.rcParams['font.size'] = 30

    # Set up the figure with a transparent background
    plt.figure(figsize=(10, 20), facecolor='none')
    custom_colors = ["mediumturquoise", "cornflowerblue", "hotpink"]

    # First subplot: Ridgeline Plot
    plt.subplot(311, facecolor='none')
    for i, (label, color) in enumerate(zip(labels, custom_colors)):
        subset = combined_df[combined_df['Dataset'] == label]
        sns.kdeplot(data=subset, x='Peak/Baseline Ratio', fill=True, color=color, alpha=0.1, label=label)

    plt.xlabel('Peak to Baseline Ratio', fontsize=20)
    plt.ylabel('Density', fontsize=20)
    plt.title('Ridgeline Plot: Distribution and Density of Ratios \n', fontsize=30)
    plt.legend(loc='upper right', fontsize=15)
    plt.xlim(0, 10)
    plt.tight_layout()

    # Second subplot: First Derivative
    plt.subplot(312, facecolor='none')
    for label, color in zip(labels, custom_colors):
        subset = combined_df[combined_df['Dataset'] == label]

        # Create a new figure for the KDE plot to avoid reusing the same axis
        plt.figure(facecolor='none') 

        # Generate KDE plot and extract x and y values
        kde = sns.kdeplot(data=subset, x='Peak/Baseline Ratio', fill=False, color=color, alpha=0.0)
        x = kde.get_lines()[0].get_xdata()
        y = kde.get_lines()[0].get_ydata()

        # Clear the axis after extracting data
        plt.clf()

        # Compute the numerical derivative
        dy_dx = np.gradient(y, x)

        # Plot the numerical derivative
        plt.figure(1, facecolor='none')
        plt.plot(x, dy_dx, linestyle='--', color=color, alpha=0.8, label=f"{label} derivative")

    plt.xlabel('Peak to Baseline Ratio', fontsize=20)
    plt.ylabel('Derivative of Ridgeline Plot: Density', fontsize=20)
    plt.title('Derivative of Ridgeline Plot: \n Peak to Baseline Ratio', fontsize=30)
    plt.legend(loc='upper right', fontsize=15)
    plt.xlim(0, 10)
    plt.tight_layout()

    # Third subplot: Second Derivative
    plt.subplot(313, facecolor='none')
    for label, color in zip(labels, custom_colors):
        subset = combined_df[combined_df['Dataset'] == label]

        # Create a new figure for the KDE plot to avoid reusing the same axis
        plt.figure(facecolor='none') 

        # Generate KDE plot and extract x and y values
        kde = sns.kdeplot(data=subset, x='Peak/Baseline Ratio', fill=False, color=color, alpha=0.0)
        x = kde.get_lines()[0].get_xdata()
        y = kde.get_lines()[0].get_ydata()

        # Clear the axis after extracting data
        plt.clf()

        # Compute the second derivative
        dy_dx_1 = np.gradient(y, x)
        dy_dx = np.gradient(dy_dx_1, x)

        # Plot the second derivative
        plt.figure(1, facecolor='none')
        plt.plot(x, dy_dx, linestyle='--', color=color, alpha=0.8, label=f"{label} second derivative")

    plt.xlabel('Peak to Baseline Ratio', fontsize=20)
    plt.ylabel('Second Derivative of Ridgeline Plot: Density', fontsize=20)
    plt.title('Second Derivative of Ridgeline Plot: \n Peak to Baseline Ratio', fontsize=30)
    plt.legend(loc='upper right', fontsize=15)
    plt.xlim(0, 10)
    plt.tight_layout()

    # Show the plots
    plt.show()

plot_ridgeline(csv_files, labels, save_path)
