In [None]:
# # Example: Calculating Basic MD Analysis Metrics

# This notebook demonstrates how to use functions from the `md_analysis_tools` library to calculate fundamental metrics often analyzed from molecular dynamics trajectories:
# 
# *   **Distances:** Between specific atoms or groups over time.
# *   **RMSF:** Root Mean Square Fluctuation per atom/residue.
# *   **Rg:** Radius of Gyration over time.
# *   **RMSD:** Root Mean Square Deviation relative to a reference structure.
# 
# **Workflow:**
# 1. Import libraries.
# 2. Load simulation data (topology/trajectory).
# 3. Define selections and parameters for each analysis.
# 4. Call the corresponding functions from `md_analysis_tools`.
# 5. Plot the results (e.g., metrics vs. time or residue).


In [None]:
# Import necessary libraries
import md_analysis_tools # Our custom library
import MDAnalysis as mda
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys # For checking errors

# Configure plotting style (optional)
plt.style.use('seaborn-v0_8-poster')


In [None]:
# ## 1. Load Simulation Data

# We need the topology and trajectory files for the simulation we want to analyze. For RMSD calculations, we also need a reference structure file.

# **ACTION:** Replace placeholder paths with your actual file paths.


In [None]:
# --- User Input: Define File Paths ---
topology_file = "placeholder.prmtop"    # <-- REPLACE with your topology file
trajectory_file = "placeholder.dcd"      # <-- REPLACE with your trajectory file
reference_pdb_file = "placeholder_ref.pdb" # <-- REPLACE with your reference PDB (for RMSD)
output_dir = "basic_metrics_output"     # Directory to save results

# Create output directory
os.makedirs(output_dir, exist_ok=True)

# --- Load Universe ---
print(f"Loading trajectory...")
u = None
try:
    if not os.path.exists(topology_file) or not os.path.exists(trajectory_file):
         raise FileNotFoundError(f"Ensure topology ('{topology_file}') and trajectory ('{trajectory_file}') files exist.")
    u = mda.Universe(topology_file, trajectory_file)
    print(f"Successfully loaded Trajectory Universe with {len(u.trajectory)} frames.")
except Exception as e:
    print(f"Error loading Trajectory Universe: {e}", file=sys.stderr)
    # Decide if you want to exit if loading fails
    # exit()

# --- Load Reference Universe (for RMSD) ---
u_ref = None
print(f"\nLoading reference structure...")
try:
    if not os.path.exists(reference_pdb_file):
        raise FileNotFoundError(f"Reference PDB file ('{reference_pdb_file}') not found.")
    u_ref = mda.Universe(reference_pdb_file)
    print(f"Successfully loaded Reference Universe.")
except Exception as e:
    print(f"Error loading Reference Universe: {e}", file=sys.stderr)
    print("RMSD calculation relative to reference will be skipped.")



In [None]:
# ## 2. Calculate Distances

# We use `calculate_distances` to measure the distance between specified pairs of atom groups (or single atoms) over time. Define pairs using MDAnalysis selection strings.


In [None]:
# --- Define Distance Pairs ---
# Example pairs (REPLACE with selections relevant to your system):
# Pair 1: C-alpha distance between residue 10 and 50
# Pair 2: COM distance between protein chain A and a ligand named LIG
# Pair 3: Distance between atom O in residue 20 and atom N in residue 25
selection_pairs_for_dist = [
    ("resid 10 and name CA", "resid 50 and name CA"),
    ("protein and chainID A", "resname LIG"),
    ("resid 20 and name O", "resid 25 and name N") 
]

# --- Calculate Distances ---
df_distances = None
if u: # Only proceed if trajectory universe loaded
    print("\nCalculating distances...")
    df_distances = md_analysis_tools.calculate_distances(
        universe=u,
        selection_pairs=selection_pairs_for_dist,
        # Optional: Adjust frame range/step
        # start_frame=0, stop_frame=-1, step=10 
    )

    if df_distances is not None:
        print("\nDistance calculation summary:")
        print(df_distances.head())
        # Save results
        dist_out_file = os.path.join(output_dir, "distances.csv")
        df_distances.to_csv(dist_out_file, index=False)
        print(f"Saved distances to {dist_out_file}")
    else:
        print("Distance calculation failed.", file=sys.stderr)


In [None]:
# ### Plot Distances vs. Time


In [None]:
# --- Plot Distances ---
if df_distances is not None:
    print("\nPlotting distances...")
    plt.figure(figsize=(12, 6))
    
    # Plot each distance column (skip the 'Frame' column)
    for col in df_distances.columns[1:]:
        plt.plot(df_distances['Frame'], df_distances[col], label=col, alpha=0.8)
        
    plt.title('Distances Over Time')
    plt.xlabel('Frame Index')
    plt.ylabel('Distance (Å)')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # Legend outside plot
    plt.grid(True, linestyle=':')
    plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout for legend
    plt.savefig(os.path.join(output_dir, "distances_plot.png"), dpi=300)
    plt.show()
else:
    print("Skipping distance plotting.")


In [None]:
# ## 3. Calculate Radius of Gyration (Rg)

# We use `calculate_radius_of_gyration` to measure the overall compactness of a selected group (e.g., the whole protein C-alphas) over time.


In [None]:
# --- Define Rg Selection ---
rg_selection = "protein and name CA" # Selection for Rg calculation

# --- Calculate Rg ---
df_rg = None
if u:
    print(f"\nCalculating Radius of Gyration (Rg) for selection: '{rg_selection}'...")
    df_rg = md_analysis_tools.calculate_radius_of_gyration(
        universe=u,
        selection=rg_selection,
        # Optional: Adjust frame range/step
        # start_frame=0, stop_frame=-1, step=10
    )

    if df_rg is not None:
        print("\nRg calculation summary:")
        print(df_rg.head())
        # Save results
        rg_out_file = os.path.join(output_dir, "radius_of_gyration.csv")
        df_rg.to_csv(rg_out_file, index=False)
        print(f"Saved Rg to {rg_out_file}")
    else:
        print("Rg calculation failed.", file=sys.stderr)


In [None]:
# ### Plot Rg vs. Time


In [None]:
# --- Plot Rg ---
if df_rg is not None:
    print("\nPlotting Radius of Gyration...")
    plt.figure(figsize=(12, 5))
    plt.plot(df_rg['Frame'], df_rg['Rg'], label=rg_selection)
    plt.title('Radius of Gyration Over Time')
    plt.xlabel('Frame Index')
    plt.ylabel('Radius of Gyration (Å)')
    plt.legend()
    plt.grid(True, linestyle=':')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "radius_of_gyration_plot.png"), dpi=300)
    plt.show()
else:
    print("Skipping Rg plotting.")


In [None]:
# ## 4. Calculate Root Mean Square Fluctuation (RMSF)

# We use `calculate_rmsf` to determine the fluctuation of each atom in a selection around its average position after aligning the trajectory. This indicates flexibility.


In [None]:
# --- Define RMSF Selections ---
rmsf_selection = "protein and name CA" # Atoms for RMSF calculation
rmsf_align_selection = "protein and name CA" # Atoms for alignment (or None to skip align)

# --- Calculate RMSF ---
df_rmsf = None
if u:
    print(f"\nCalculating RMSF for selection: '{rmsf_selection}'...")
    # Note: calculate_rmsf returns a DataFrame directly now
    df_rmsf = md_analysis_tools.calculate_rmsf(
        universe=u,
        selection=rmsf_selection,
        align=(rmsf_align_selection is not None),
        align_selection=rmsf_align_selection,
        # Optional: Adjust frame range/step
        # start_frame=100, stop_frame=-1, step=5 # Example: skip equilibration
    )

    if df_rmsf is not None:
        print("\nRMSF calculation summary:")
        print(df_rmsf.head())
        # Save results
        rmsf_out_file = os.path.join(output_dir, "rmsf_results.csv")
        df_rmsf.to_csv(rmsf_out_file, index=False)
        print(f"Saved RMSF to {rmsf_out_file}")
    else:
        print("RMSF calculation failed.", file=sys.stderr)


In [None]:
# ### Plot RMSF vs. Residue


In [None]:
# --- Plot RMSF ---
if df_rmsf is not None:
    print("\nPlotting RMSF...")
    plt.figure(figsize=(12, 5))
    # Use Residue ID (Resid) or Residue Number (Resnum) for x-axis as available
    x_axis = 'Resid' if 'Resid' in df_rmsf.columns else 'Resnum' 
    plt.plot(df_rmsf[x_axis], df_rmsf['RMSF'], marker='.', linestyle='-', label=rmsf_selection)
    # Or use a bar plot
    # plt.bar(df_rmsf[x_axis], df_rmsf['RMSF'], width=0.8, label=rmsf_selection)
    plt.title('RMSF per Residue')
    plt.xlabel('Residue Identifier') # Use identifier as it could be Resid or Resnum
    plt.ylabel('RMSF (Å)')
    plt.legend()
    plt.grid(True, linestyle=':')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "rmsf_plot.png"), dpi=300)
    plt.show()
else:
    print("Skipping RMSF plotting.")


In [None]:
# ## 5. Calculate Root Mean Square Deviation (RMSD)

# We use `calculate_rmsd_relative_to_ref` to measure the deviation of the trajectory structures from a given reference structure, after alignment.


In [None]:
# --- Define RMSD Selections ---
rmsd_selection = "protein and name CA" # Atoms for RMSD calculation
rmsd_align_selection = "protein and name CA" # Atoms for alignment (or None to use rmsd_selection)

# --- Calculate RMSD ---
df_rmsd = None
if u and u_ref: # Only proceed if both universes loaded
    print(f"\nCalculating RMSD relative to '{reference_pdb_file}'...")
    df_rmsd = md_analysis_tools.calculate_rmsd_relative_to_ref(
        universe=u,
        reference_universe=u_ref,
        selection=rmsd_selection,
        align_selection=rmsd_align_selection,
        # Optional: Adjust frame range/step
        # start_frame=0, stop_frame=-1, step=10
    )

    if df_rmsd is not None:
        print("\nRMSD calculation summary:")
        print(df_rmsd.head())
        # Save results
        rmsd_out_file = os.path.join(output_dir, "rmsd_results.csv")
        df_rmsd.to_csv(rmsd_out_file, index=False)
        print(f"Saved RMSD to {rmsd_out_file}")
    else:
        print("RMSD calculation failed.", file=sys.stderr)
elif not u:
     print("\nSkipping RMSD calculation because trajectory Universe failed to load.", file=sys.stderr)
elif not u_ref:
     print("\nSkipping RMSD calculation because Reference Universe failed to load.", file=sys.stderr)



In [None]:
# ### Plot RMSD vs. Time


In [None]:
# --- Plot RMSD ---
if df_rmsd is not None:
    print("\nPlotting RMSD...")
    plt.figure(figsize=(12, 5))
    plt.plot(df_rmsd['Frame'], df_rmsd['RMSD'], label=f"RMSD to {os.path.basename(reference_pdb_file)}")
    plt.title('RMSD Over Time')
    plt.xlabel('Frame Index')
    plt.ylabel('RMSD (Å)')
    plt.legend()
    plt.grid(True, linestyle=':')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "rmsd_plot.png"), dpi=300)
    plt.show()
else:
    print("Skipping RMSD plotting.")



In [None]:
#%% md