In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np
from scipy.optimize import curve_fit

In [52]:
# Define specific formatting settings
color_hex1 = '#CC0078'  # Histogram color
color_hex2 = '#667C85'  # CDF color
font_size = 20
plot_title = "HOTAIR H18"  # Example plot title
line_thickness = 2
min_x_value, max_x_value = 800, 1449
min_y_value, max_y_value = -1, 44





# Specify the path to the Excel file and parameters
file_path = "C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona/total_results_H18 fitted_final_LP_2.xlsx"
column_name = "Force, pN"
bin_width = 2
min_x = 5
max_x = 40
output_dir = "C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona"


# Initial guesses for 3 Gaussians: amplitude, mean, and stddev for each Gaussian
# You can modify these initial guesses based on your knowledge or guesswork
initial_guesses = [0.15, 11, 2, 0.08, 25, 2, 0.015, 30, 0.5]  # [amplitude1, mean1, stddev1, amplitude2, mean2, stddev2, amplitude3, mean3, stddev3]
lower_bounds = [x / 2 for x in initial_guesses]
upper_bounds = [x * 1.2 for x in initial_guesses]
num_gaussians = 3


In [50]:
def gaussian(x, amplitude, mean, stddev):
    return amplitude * np.exp(-((x - mean) ** 2) / (2 * stddev ** 2))

def multi_gaussian(x, *params):
    n = len(params) // 3  # Each Gaussian has 3 parameters: amplitude, mean, and stddev
    result = np.zeros_like(x)
    for i in range(n):
        amplitude, mean, stddev = params[i*3], params[i*3+1], params[i*3+2]
        result += gaussian(x, amplitude, mean, stddev)
    return result

def load_excel_data(file_path, column_name):
    """Load data from an Excel file and return the specified column as a pandas Series."""
    df = pd.read_excel(file_path)
    if column_name not in df.columns:
        raise ValueError(f"Column '{column_name}' not found in the Excel file.")
    return df[column_name].dropna()

def format_plot(ax, title):
    """Apply custom formatting to plots."""
    ax.set_title(title, fontsize=font_size)
    ax.spines['top'].set_linewidth(2)
    ax.spines['right'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['top'].set_color('white')
    ax.spines['right'].set_color('white')
    ax.tick_params(axis='both', which='major', labelsize=font_size, direction='in', length=6, width=2)

def fit_gaussians(data, num_gaussians, x_range):
    """Fit a variable number of Gaussian models to the data."""
    bin_edges = np.linspace(x_range[0], x_range[1], 20)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    hist_values, _ = np.histogram(data, bins=bin_edges, density=True)
    if lower_bounds and upper_bounds:
        params, covariance = curve_fit(multi_gaussian, bin_centers, hist_values, p0=initial_guesses,
                                    bounds=(lower_bounds, upper_bounds))
    else:
        params, covariance = curve_fit(multi_gaussian, bin_centers, hist_values, p0=initial_guesses)

    return params

def plot_histogram(data, output_dir, column_name, bin_width=0.5, x_range=(0, 100), num_gaussians=3):
    """Plot histogram using seaborn, fit Gaussian models, and save the output."""
    fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
    sns.histplot(data, binwidth=bin_width, kde=False, color=color_hex1, ax=ax, stat="density")
    ax.set_xlim(x_range)
    ax.set_xlabel(column_name, fontsize=font_size)
    ax.set_ylabel("Probability density, pN-1", fontsize=font_size)
    format_plot(ax, plot_title + " - " + column_name)
    
    # Fit Gaussian models
    params = fit_gaussians(data, num_gaussians, x_range)
    x_fit = np.linspace(x_range[0], x_range[1], 1000)
    y_fit = multi_gaussian(x_fit, *params)
    
    
    fitted_amplitudes = params[::3]
    fitted_means = params[1::3]
    fitted_stddevs = params[2::3]
    colors = ['black'] * num_gaussians
    
    for i in range(num_gaussians):
        individual_gaussian = gaussian(x_fit, fitted_amplitudes[i], fitted_means[i], fitted_stddevs[i])
        ax.plot(x_fit, individual_gaussian, color=colors[i], linestyle='dashed', linewidth=3, label=f'Gaussian {i+1}')
    #ax.plot(x_fit, y_fit, 'grey', label=f'{num_gaussians}-Gaussian Fit')

    ax.legend()
    png_path = os.path.join(output_dir, f"{plot_title}_{column_name}_histogram.png")
    svg_path = os.path.join(output_dir, f"{plot_title}_{column_name}_histogram.svg")
    csv_path = os.path.join(output_dir, f"{plot_title}_{column_name}_fitted_gaussians.csv")
    
    plt.savefig(png_path)
    plt.savefig(svg_path, format='svg')
    plt.close()
    
    # Save fitted values to CSV
    df_fitted = pd.DataFrame({
        "Gaussian": [f'Gaussian {i+1}' for i in range(num_gaussians)],
        "Amplitude": fitted_amplitudes,
        "Mean": fitted_means,
        "StdDev": fitted_stddevs
    })
    df_fitted.to_csv(csv_path, index=False)
    print(f"Histogram with Gaussian fits saved as {png_path} and {svg_path}")
    print(f"Fitted values saved as {csv_path}")

def plot_cdf(data, output_dir, column_name):
    """Plot cumulative distribution function (CDF) and save the output."""
    fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
    sns.ecdfplot(data, color=color_hex1, ax=ax, linewidth=2)
    ax.set_xlabel(column_name, fontsize=font_size)
    ax.set_ylabel("Cumulative Probability", fontsize=font_size)
    format_plot(ax, plot_title + " - " + column_name)
    
    png_path = os.path.join(output_dir, f"{plot_title}_{column_name}_cdf.png")
    svg_path = os.path.join(output_dir, f"{plot_title}_{column_name}_cdf.svg")
    plt.savefig(png_path)
    plt.savefig(svg_path, format='svg')
    plt.close()
    print(f"CDF saved as {png_path} and {svg_path}")



In [53]:

data = load_excel_data(file_path, column_name)
plot_histogram(data, output_dir, column_name, bin_width, (min_x, max_x))
plot_cdf(data, output_dir, column_name)

Histogram with Gaussian fits saved as C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona\HOTAIR H18_Force, pN_histogram.png and C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona\HOTAIR H18_Force, pN_histogram.svg
Fitted values saved as C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona\HOTAIR H18_Force, pN_fitted_gaussians.csv
CDF saved as C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona\HOTAIR H18_Force, pN_cdf.png and C:/Users/lupe184g/Desktop/Postdoc/01_Projects/OT_data/Fiona\HOTAIR H18_Force, pN_cdf.svg
