In [None]:
import MDAnalysis as mda
import numpy as np
import pandas as pd
from collections import defaultdict
import sys # Import sys to exit if file not found

# --- Configuration ---
# Define the path to your Universe file (e.g., .gro, .pdb)
# Please ensure this file exists in the location specified.
pdb_file = "/scratch/angiod/HEroBM/A2A_A2B/a2a.cg.elnedyn21.pdb" # "/scratch/angiod/HEroBM/A2A_A2B/a2a.cg.gro"
# If you prefer to use the backmapped PDB, uncomment the line below and comment the one above:
# pdb_file = "/home/angiod@usi.ch/HEroBM/backmapped/pdb.6k/martini2/3kg9.pdb.CG_0.pdb"

# Define the relevant bead names for Martini coarse-grained models
# These are the beads we will check for connections
relevant_bead_names = ["BB", "SC1", "SC2", "SC3", "SC4"]

# --- Load the Molecular Dynamics Universe ---
print(f"Loading universe from {pdb_file}...")
try:
    u = mda.Universe(pdb_file)
    print("Universe loaded successfully.")
except FileNotFoundError:
    print(f"Error: PDB file not found at {pdb_file}")
    print("Please check the file path and ensure the file exists.")
    sys.exit(1) # Exit the script if the file is not found
except Exception as e:
    print(f"An error occurred while loading the universe: {e}")
    sys.exit(1) # Exit on other loading errors

# --- Data Structure to Store Distances ---
# We'll use a dictionary where the key represents the connection type
# Key format: (resname1, beadname1, resname2, beadname2)
# Value: A list of distances measured for this connection type across the structure
distances = defaultdict(list)

# --- Iterate Through the Structure to Find Connections and Measure Distances ---
print("Calculating distances for connected beads...")

# Iterate through segments (chains) in the universe
for segment in u.segments:
    # Iterate through residues within the current segment
    # We use index i to easily access the next residue for inter-residue connections
    for i in range(len(segment.residues)):
        res1 = segment.residues[i]

        # --- Intra-residue connections (within the same residue) ---
        # Check for connections between consecutive beads within the current residue
        # e.g., BB-SC1, SC1-SC2, SC2-SC3, SC3-SC4
        for j in range(len(relevant_bead_names) - 1):
            bead1_name = relevant_bead_names[j]
            bead2_name = relevant_bead_names[j+1]

            # Attempt to select the atoms for the current bead names within res1's atoms
            # CORRECTED: Use res1.atoms.select_atoms(...)
            atom1 = res1.atoms.select_atoms(f"name {bead1_name}")
            atom2 = res1.atoms.select_atoms(f"name {bead2_name}")

            # If both atoms exist in this residue, calculate the distance
            if atom1 and atom2:
                atom1 = atom1[0]
                atom2 = atom2[0]
                dist = np.linalg.norm(atom2.position - atom1.position)
                # Store the distance. The key identifies the connection type.
                # We use the residue's resname for both sides of the connection as it's intra-residue.
                # The order of bead names in the key follows the relevant_bead_names list order.
                key = (res1.resname, bead1_name, res1.resname, bead2_name)
                distances[key].append(dist)

        # --- Inter-residue connections (between consecutive residues) ---
        # Check for the BB-BB connection between the current residue (res1)
        # and the next residue (res2) in the same segment.
        if i + 1 < len(segment.residues): # Ensure there is a next residue
            res2 = segment.residues[i+1]

            # Verify that res1 and res2 are consecutive by residue index
            # This handles cases where residue indices might not be perfectly sequential due to missing residues
            if res2.resid == res1.resid + 1:
                # Find the BB atoms in both the current and the next residue
                # CORRECTED: Use res1.atoms.select_atoms(...) and res2.atoms.select_atoms(...)
                bb_atom1 = res1.atoms.select_atoms("name BB")[0]
                bb_atom2 = res2.atoms.select_atoms("name BB")[0]

                # If both BB atoms exist, calculate the distance
                if bb_atom1 and bb_atom2:
                    dist = np.linalg.norm(bb_atom2.position - bb_atom1.position)
                    # Store the distance for the inter-residue BB-BB connection.
                    # We sort the resnames in the key to ensure consistency regardless of
                    # which residue comes first in the iteration (though here it's always res1 then res2).
                    sorted_resnames = sorted([res1.resname, res2.resname])
                    key = (sorted_resnames[0], 'BB', sorted_resnames[1], 'BB')
                    distances[key].append(dist)

print("Finished calculating distances.")

# --- Aggregate and Compute Statistics ---
print("Aggregating results and computing statistics...")
rows = []
# Sort the keys alphabetically for a consistent output order in the table
sorted_keys = sorted(distances.keys())

for key in sorted_keys:
    resname1, beadname1, resname2, beadname2 = key
    dists = distances[key]

    # Only include pairs for which at least one distance was measured
    if dists:
        rows.append({
            # Format the bead pair as requested: resname.beadname
            "resname1.beadname1": f"{resname1}.{beadname1}",
            "resname2.beadname2": f"{resname2}.{beadname2}",
            "count": len(dists), # Number of times this connection was found
            "mean_distance": np.mean(dists),
            "std_distance": np.std(dists),
            "min_distance": np.min(dists),
            "max_distance": np.max(dists),
        })

# --- Create and Save DataFrame ---
# Define the desired column order for the output CSV
output_columns = ["resname1.beadname1", "resname2.beadname2", "count", "mean_distance", "std_distance", "min_distance", "max_distance"]

# Create a pandas DataFrame from the collected rows
df = pd.DataFrame(rows)

# Reindex the DataFrame to ensure the columns are in the specified order
df = df.reindex(columns=output_columns)

# Define the output CSV file name
output_csv_file = "connected_bead_distance_stats.elnedyn21.csv"

# Save the DataFrame to a CSV file without including the DataFrame index
df.to_csv(output_csv_file, index=False)

print(f"Distance statistics saved to {output_csv_file}")
print("Script finished.")