In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Function to process a single .xvg file
def process_xvg(file_path, group_name):
    try:
        # Read the file, ignoring lines starting with '#' or '@'
        data = np.genfromtxt(
            [line for line in open(file_path).read().splitlines() if not line.startswith(('#', '@'))]
        )
        # Convert to DataFrame
        df = pd.DataFrame(data, columns=["Time", f"{group_name}"])
        return df
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

In [None]:
# Function to combine multiple .xvg files into a single DataFrame
def combine_xvg_files(xvg_files):
    combined_df = None
    for i, file_path in enumerate(xvg_files):
        group_name = f"Replicate_{i+1}"
        df = process_xvg(file_path, group_name)
        if df is not None:
            if combined_df is None:
                combined_df = df
            else:
                combined_df = pd.merge(combined_df, df, on="Time", how="outer")
    return combined_df

In [None]:
# Function to plot density for multiple datasets
def plot_density(xvg_files, title, output_png, x_label, data_type):
    # Combine all .xvg files into a single DataFrame
    combined_df = combine_xvg_files(xvg_files)

    if combined_df is not None:
        plt.figure(figsize=(8, 6))

        # Define colors for each replicate
        colors = ["red", "blue", "green", "purple", "orange"]

        # Plot density for each replicate
        for i, column in enumerate(combined_df.columns[1:]):  # Skip the "Time" column
            sns.kdeplot(combined_df[column], fill=True, color=colors[i], label=column)

        # Customize the plot
        plt.title(title, fontsize=12, weight='bold')
        plt.xlabel(x_label, fontsize=12, weight='bold')
        plt.ylabel('Density', fontsize=12, weight='bold')

        plt.xticks(fontsize=10, weight='bold')
        plt.yticks(fontsize=10, weight='bold')

        plt.legend(loc='upper right')
        plt.tight_layout()

        # Save the plot
        plt.savefig(output_png, bbox_inches='tight')
        plt.show()
        print(f"Saved {data_type} density plot as {output_png}")
    else:
        print("No data to plot.")

In [None]:
# Example usage for RMSD data
xvg_files_rmsd = [
    "sim_ana/rmsd/pro_BB_rmsd_rep1.xvg",
    "sim_ana/rmsd/pro_BB_rmsd_rep2.xvg",
    "sim_ana/rmsd/pro_BB_rmsd_rep3.xvg",
    "sim_ana/rmsd/pro_BB_rmsd_rep4.xvg",
    "sim_ana/rmsd/pro_BB_rmsd_rep5.xvg"
]

# Plot density for RMSD data
plot_density(
    xvg_files_rmsd,
    title="RMSD Density Plot",
    output_png="img_ana/rmsd_density_plot.png",
    x_label="RMSD (nm)",
    data_type="RMSD"
)

In [None]:
# Example usage for distance data
xvg_files_distance = [
    "sim_ana/mindist/ligand_Y37_rep1.xvg",
    "sim_ana/mindist/ligand_Y37_rep2.xvg",
    "sim_ana/mindist/ligand_Y37_rep3.xvg",
    "sim_ana/mindist/ligand_Y37_rep4.xvg",
    "sim_ana/mindist/ligand_Y37_rep5.xvg"
]

# Plot density for distance data
plot_density(
    xvg_files_distance,
    title="Distance Density Plot",
    output_png="img_ana/mindist_density_plot.png",
    x_label="Distance (nm)",
    data_type="Distance"
)