**Code that reads in models in rmf3 format and reads in crosslinking data, and then maps the crosslinks onto the structure in the rmf3 file, or creates a script that will be read in by chimerax to map the crosslinks and visualize it there.**

In [6]:
import os
import IMP
import RMF
import IMP.pmi.output
from Bio.PDB import PDBParser, Superimposer, PDBExceptions
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import logging
from multiprocessing import Pool, cpu_count
from functools import partial
import shutil

In [None]:
# read in rmf3 file and extract hierarchy
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def write_pdb(hier, pdb_head):
    """
    Write PDB files for each chain in the hierarchy and combine them into a single PDB file.

    Args:
        hier (IMP.atom.Hierarchy): The hierarchy containing the chains.
        pdb_head (str): The prefix for the output PDB file names.

    Returns:
        str: Path to the combined PDB file.
    """
    output_dir = 'output_pdbs'
    os.makedirs(output_dir, exist_ok=True)

    chains = hier.get_children()[0].get_children()
    pdb_files = []
    
    # Write individual PDB files for each chain
    for i, ch in enumerate(chains):
        if i < 60:
            continue        
        output_pdb_path = os.path.join(output_dir, f'{pdb_head}_{i}.pdb')
        o = IMP.pmi.output.Output()
        o.init_pdb(output_pdb_path, ch)
        o.write_pdb(output_pdb_path)
        pdb_files.append(output_pdb_path)

    # Combine individual PDB files into a single PDB file
    final_pdb_path = os.path.join(output_dir, f'{pdb_head}.pdb')
    with open(final_pdb_path, 'w') as outfile:
        for fname in pdb_files:
            with open(fname) as infile:
                for line in infile:
                    if line.startswith('ATOM'):
                        outfile.write(line)

    # Delete individual chain PDB files
    for fname in pdb_files:
        os.remove(fname)
    
    return final_pdb_path

def process_rmf3_frame(rmf_filename, frame_number):
    imp_model = IMP.Model()
    with RMF.open_rmf_file_read_only(rmf_filename) as rmf_file:
        hier = IMP.rmf.create_hierarchies(rmf_file, imp_model)
        IMP.rmf.load_frame(rmf_file, RMF.FrameID(frame_number))
        pdb_file = write_pdb(hier[0], f'test_frame_{frame_number}')

## Crosslink Mapping Strategy

### Handling Ambiguous Crosslinks

When mapping crosslinks to structural models, we need to account for protein copy numbers and the resulting ambiguity in crosslink assignments.

#### Scenarios:

**Inter-protein crosslinks (p1 ↔ p2)**
- If protein p1 has copy number = 2 and protein p2 has copy number = 2
- A crosslink between p1 and p2 can have **4 possible mappings**:
    - p1_0 ↔ p2_0
    - p1_0 ↔ p2_1  
    - p1_1 ↔ p2_0
    - p1_1 ↔ p2_1

**Intra-protein crosslinks (p1 ↔ p1)**

*Same residue numbers:*
- Must be between **different copies only**
- Mapping: p1_0 ↔ p1_1

*Different residue numbers:*
- Can be between **same copy or different copies**
- Possible mappings:
    - p1_0 ↔ p1_0 (intra-copy)
    - p1_0 ↔ p1_1 (inter-copy)
    - p1_1 ↔ p1_0 (inter-copy)
    - p1_1 ↔ p1_1 (intra-copy)

In [None]:
# map chain ID to protein names
chain_to_protein = {
    'A': 'DDI1',
    'B': 'DDI1',
    'C': 'DDI2',
    'D': 'DDI2'
}