In [3]:
import pandas as pd
import numpy as np
from MDAnalysis import Universe
from MDAnalysis.analysis.distances import distance_array
from calc_df import *
from utils import *


In [6]:
file_pairs = [
    ('../../resub_traj/n17_k10_II.red.tpr', '../../resub_traj/n17_k10_II.red.xtc'),
    ('../../resub_traj/n17_k10_III.red.tpr', '../../resub_traj/n17_k10_III.red.xtc'),
    ('../../resub_traj/n17_k10_IV.red.tpr', '../../resub_traj/n17_k10_IV.red.xtc')
]

# Create Skeleton DataFrame

Each row of the DataFrame contains the following columns:

- **`global_pep_ID`**  
  Peptide count across replicas (ranging grom 0 to 35*3)

- **`local_pep_ID`**  
  Peptide index within a single replica (ranges from 0 to 35).

- **`frame`**  
  Frame number (ranges from 0 to 5000).

- **`trajectory`**  
  Identifier for the trajectory the peptide-frame pair comes from.

- **`target_status`**  
  Categorical label (0–3) indicating peptide binding type:

  | Value | Description                    |
  |-------|--------------------------------|
  | 0     | Directly bound to **Mc**       |
  | 1     | Directly bound to **Mp**       |
  | 2     | Indirectly bound to **Mc**     |
  | 3     | Indirectly bound to **Mp**     |


In [7]:
# Function calls to generate the initial template df
dfs = []

for i, (tpr, xtc) in enumerate(file_pairs):
    df_traj = build_init_df(tpr, xtc, traj_index=i, start=0, stop=5000)
    dfs.append(df_traj)

final_df = pd.concat(dfs, ignore_index=True)
final_df.to_csv("skeleton_df.csv", index=False)

In [8]:
df_template = pd.read_csv('skeleton_df.csv')
print_df_status(df_template)

Current version of the df has the following columns
Index(['global_pep_ID', 'local_pep_ID', 'frame', 'trajectory',
       'target_status'],
      dtype='object')


## Peptide-peptide heavy atom contacts

Calculates # of contacts (within 7 Å)  between a selected peptide and all others across specified frames and trajectories, returning a DataFrame of contact counts.



In [12]:
# Run the peptide-peptide contact analysis
contact_df = pep_pep_contacts(df_template, file_pairs)

# Merge the new contact data back into the base DataFrame
df_template = pd.merge(
    df_template,
    contact_df[["trajectory", "frame", "local_pep_ID", "pp_contacts"]],
    on=["trajectory", "frame", "local_pep_ID"],
    how="left"
)

save_new_df(df_template)
print_df_status(df_template)

Saving current version of the df. 
 The following columns are present: 
Index(['global_pep_ID', 'local_pep_ID', 'frame', 'trajectory', 'target_status',
       'pp_contacts'],
      dtype='object')
Saving to current_features.csv
Current version of the df has the following columns
Index(['global_pep_ID', 'local_pep_ID', 'frame', 'trajectory', 'target_status',
       'pp_contacts'],
      dtype='object')


## Peptide-lipid interactions


Calculates hydrogen-like contacts (within 7 Å) between a selected peptide and all lipids (PO4)  across specified frames and trajectories, returning a DataFrame of contact counts.


In [16]:
# Run the peptide-peptide contact analysis
contact_df = pep_lip_contacts(df_template, file_pairs)

# Merge the new contact data back into the base DataFrame
df_template = pd.merge(
    df_template,
    contact_df[["trajectory", "frame", "local_pep_ID", "pl_contacts"]],
    on=["trajectory", "frame", "local_pep_ID"],
    how="left"
)

save_new_df(df_template)
print_df_status(df_template)

Saving current version of the df. 
 The following columns are present: 
Index(['global_pep_ID', 'local_pep_ID', 'frame', 'trajectory', 'target_status',
       'pp_contacts', 'pl_contacts'],
      dtype='object')
Saving to current_features.csv
Current version of the df has the following columns
Index(['global_pep_ID', 'local_pep_ID', 'frame', 'trajectory', 'target_status',
       'pp_contacts', 'pl_contacts'],
      dtype='object')


## Dipole peptide-peptide interactions

In [None]:
# Load latest version of df and run the peptide-peptide contact analysis
df_template = pd.read_csv('current_features.csv')
contact_df = pep_dip_dip_contacts(df_template, file_pairs)

# Merge the new contact data back into the base DataFrame
df_template = pd.merge(
    df_template,
    contact_df[["trajectory", "frame", "local_pep_ID", "dip_pp_contacts"]],
    on=["trajectory", "frame", "local_pep_ID"],
    how="left"
)
save_new_df(df_template)
df_template = pd.read_csv('current_features.csv')