# Hydrogen Bond Energy Analysis

This notebook calculates a residue-residue hydrogen bond energy map for a molecular dynamics trajectory. The analysis uses functions from the `native_contacts.py` script, which replicates the smooth energy potentials from the `hbond.cpp` implementation in the upside2 force field.

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import nglview as nv

warnings.filterwarnings('ignore')

# Import the new analysis functions
import native_contacts as nc

# Add utils to path for mdtraj_upside
sys.path.append('../ff2_rg/utils')
import mdtraj_upside as mu
import plotting_utils as pu

%matplotlib inline

## 1. Run Analysis

Set the path to your trajectory file and run the analysis. The script will compute the hydrogen bond energy map for each frame.

In [None]:
# --- Parameters ---
traj_file = "/project/trsosnic/okleinmann/oliver/04.HDX/outputs/REMD/Kmarx_Pab1/original/Kmarx_Pab1.run.47.up"
stride = 100 # Use a larger stride for faster analysis

# --- Run Analysis ---
if not os.path.exists(traj_file):
    print(f"Error: Trajectory file not found at {traj_file}")
else:
    results = nc.run_hbond_analysis(traj_file, stride=stride)

## 2. Visualize Average Energy Map

The following function calculates the average energy map over the entire trajectory and plots it as a heatmap. This visualization highlights the most stable and significant hydrogen bond interactions between residues.

In [None]:
def plot_average_hbond_energy_map(energy_maps, n_residues):
    """
    Calculates and plots the time-averaged hydrogen bond energy map.
    """
    if energy_maps.size == 0:
        print("No energy data to plot.")
        return
    
    # Calculate the average map over all frames
    average_map = np.mean(energy_maps, axis=0)
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(average_map, cmap="viridis", linewidths=.5)
    plt.title('Average Hydrogen Bond Energy Map')
    plt.xlabel('Residue Index')
    plt.ylabel('Residue Index')
    plt.show()
    
    # Print some statistics
    total_avg_energy = np.sum(average_map)
    max_interaction = np.max(average_map)
    print(f"Total average energy in map: {total_avg_energy:.4f}")
    print(f"Maximum average interaction energy: {max_interaction:.4f}")

In [None]:
# Plot the results
plot_average_hbond_energy_map(results['energy_maps'], results['n_residues'])

## 3. Trajectory Visualization

Interactive 3D visualization of the molecular trajectory using NGLView. This shows the protein structure with acidic residues (ASP, GLU) highlighted in red and basic residues (LYS, ARG) highlighted in blue.

In [None]:
# Load trajectory for visualization
print(f"Loading trajectory for visualization: {traj_file}")
traj = mu.load_upside_traj(traj_file, stride=1)

# Create atom selections
acid_sele = traj.top.select("(resname ASP or resname GLU) and name CA")
base_sele = traj.top.select("(resname LYS or resname ARG) and name CA")

# Create NGLView widget
view = nv.NGLWidget()
view.add_trajectory(traj[:])
view.clear_representations()
view.add_cartoon(color='gray')
view.add_spacefill(selection=acid_sele, color='red')
view.add_spacefill(selection=base_sele, color='blue')
view.camera = 'orthographic'

view


In [None]:
# Calculate time series data for plotting
if 'results' in locals() and results['energy_maps'].size > 0:
    # This is a placeholder for the actual timeseries calculation.
    # We'll create a dummy dataframe for now, assuming 'analyzer' and 'timeseries_data' would be generated.
    # In a real scenario, this would involve running the NativeContactsAnalyzer logic.
    
    # Dummy data creation for demonstration
    print("Creating dummy timeseries data for plotting demonstrations.")
    n_frames = results['energy_maps'].shape[0]
    frames = np.arange(n_frames)
    runs = [0, 1, 2, 3, 4] # Assuming 5 runs
    data = []
    for run_id in runs:
        # Simulate some data decay
        count_fraction = np.exp(-frames / (n_frames * (1 + run_id*0.2))) + np.random.rand(n_frames) * 0.1
        energy_fraction = np.exp(-frames / (n_frames * (1 + run_id*0.25))) + np.random.rand(n_frames) * 0.08
        for i, frame in enumerate(frames):
            data.append({'frame': frame, 'run_id': run_id, 'count_fraction': count_fraction[i], 'energy_fraction': energy_fraction[i]})
    timeseries_data = pd.DataFrame(data)
    
    # Generate and save the new plots
    print("\n--- Generating New Plots ---")
    pu.plot_individual_timeseries(timeseries_data)
    pu.plot_all_timeseries_subplots(timeseries_data)
    pu.plot_overlaid_timeseries(timeseries_data)
else:
    print("Skipping new plots because analysis results are not available.")