In [None]:
""" Clean pdbs | This cell is designed to clean PDB.ent files
from non-protein components and save them in pdb format

NOTE: Modules are kept in case of repurposing this code for future as its own
"""

## libraries 
import os
from Bio.PDB import PDBParser, PDBIO
from tqdm import tqdm
import sys
import warnings

# Redirect warning output to a null file
null_file = open(os.devnull, 'w')
sys.stderr = null_file

def clean_pdb_files(folder_path):
    # Get a list of all .ent files in the folder
    ent_files = [f for f in os.listdir(folder_path) if f.endswith('.ent')]

    # Initialize the progress bar
    progress_bar = tqdm(total=len(ent_files), unit='file(s)')

    # Iterate over each .ent file
    for ent_file in ent_files:
        ent_path = os.path.join(folder_path, ent_file)

        # Parse the PDB file
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
        parser = PDBParser()
        structure = parser.get_structure('pdb', ent_path)

        # Remove ligands, metals, and water molecules
        for model in structure:
            for chain in model:
                for residue in list(chain):
                    hetfield = residue.get_id()[0]
                    if hetfield != ' ':
                        chain.detach_child(residue.id)

        # Save the cleaned PDB file
        pdb_id = ent_file.split('.')[0]
        pdb_id = pdb_id.replace('pdb', '')
        clean_pdb_file = pdb_id + '.pdb'
        clean_pdb_path = os.path.join(folder_path, clean_pdb_file)

        io = PDBIO()
        io.set_structure(structure)
        io.save(clean_pdb_path)

        # Delete the .ent file
        os.remove(ent_path)

        progress_bar.set_description(f'Cleaning {ent_file}')
        progress_bar.update(1)

    progress_bar.close()
    print('Cleaned PDB files saved in', folder_path)

# Reset the warning output
sys.stderr = sys.__stderr__

# usage of function
folder_path = str(new_folder) + '/pdbs'
clean_pdb_files(folder_path)