# Convert CIF to PDB

In [None]:
import pandas as pd
from Bio import PDB
import os

In [None]:
## Load in the previously processed collection data
collection_df = pd.read_csv('../data/proteinbase_collection_nipah-binder-competition-all-submissions_processed.csv')

In [None]:
## Function to convert .CIF files to .PDB format
def convert_cif_to_pdb(cif_file_path):
    """
    Convert a .cif (mmCIF) protein structure file to .pdb format.

    Parameters
    ----------
    cif_file_path : str
        Path to the input .cif file.

    Returns
    -------
    str
        Path to the generated .pdb file.
    """
    ## Validate file extension
    if not cif_file_path.lower().endswith(".cif"):
        raise ValueError("Input file must have a .cif extension.")

    ## Determine output file path
    pdb_file_path = os.path.splitext(cif_file_path)[0] + ".pdb"

    ## Initialize parser and writer
    parser = PDB.MMCIFParser(QUIET=True)
    io = PDB.PDBIO()

    ## Extract structure ID from filename
    structure_id = os.path.basename(cif_file_path).split('.')[0]

    ## Parse structure
    structure = parser.get_structure(structure_id, cif_file_path)

    ## Write to PDB format
    io.set_structure(structure)
    io.save(pdb_file_path)

    print(f"Converted: {cif_file_path} â†’ {pdb_file_path}")
    return pdb_file_path


In [None]:
for idx, row in collection_df.iterrows():
    id = row['id']
    print(f"Making a PDB file for: {id}")
    structure_file_path = f'../data/structures/{id}_boltz2_complex.cif'

    pdb_file_path = convert_cif_to_pdb(structure_file_path)