# Step 1: File Conversion Toolkit
Convert MD trajectories in PDB (.pdb) or XYZ (.xyz) format into single frame PDB or XYZ files for extended analysis.

##### Toolkit Imports

In [None]:
# Import and run the setup script
import sys, os
import numpy as np

# Ensure the project root is in sys.path to locate setup_env
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import the setup helper and configure the environment
from setup_env import setup_environment

# Capture the imported classes from setup_environment
BulkVolumeParams, BulkVolume, RadiusOfGyrationCalculator, PDBEditor, TrajectoryProcessor, PDBFileHandler, Atom, ClusterNetwork, ClusterBatchAnalyzer = setup_environment()

## Split MD Trajectory (XYZ)

In [None]:
# Specify input file, base directory, and custom folder name
input_file = "/Users/keithwhite/repos/MDScatter/data/PbI2_DMSO_0p8M_cr_nb/trajectory/PbI2_DMSO_0.8M_cr_nb_traj.xyz"  # Path to the trajectory file
base_dir = "/Users/keithwhite/repos/MDScatter/data/PbI2_DMSO_0p8M_cr_nb"     # Base directory where the folder will be created
output_folder_name = "split_xyz"   # Custom folder name

# Initialize the TrajectoryProcessor with the given parameters
processor = TrajectoryProcessor(input_file, base_dir, output_folder_name)

# Example for XYZ: Process the file (split and preprocess XYZ trajectory)
processor.split_and_preprocess_xyz()

## Split MD Trajectory (PDB)

In [None]:
# Example for PDB: Process the file (split PDB trajectory)
pdb_input_file = "/Users/keithwhite/repos/MDScatter/data/PbI2_DMSO_0p8M_cr_nb/trajectory/PbI2_DMSO_0.8M_cr_nb_traj.pdb"
base_dir = "/Users/keithwhite/repos/MDScatter/data/PbI2_DMSO_0p8M_cr_nb"     # Base directory where the folder will be created
output_folder_name = "split_pdb"   # Custom folder name
processor = TrajectoryProcessor(pdb_input_file, base_dir, output_folder_name)
processor.split_pdb_trajectory()

## PDB Reformatting Tools
Tools for modifying the names of molecules and atom indices in PDB files. My suggestion is to preprocess the trajectory file before splitting the trajectory.

#### Check the current residue (molecule) name formats.
You may do this to verify which molecules are present in your file, or to grab their names for renaming.

In [None]:
# Example 1: Checking current residue names in the PDB file
pdb_file = "/Users/keithwhite/repos/MDScatter/data/example/frame_0001.pdb"

# Use the PDBEditor to read and print unique residue names
residue_names = PDBEditor.read_residue_names(pdb_file)
print("Unique residue names in the PDB file:", residue_names)


#### Update or add a residue (molecule) name.
You may want to do this to name a new molecule that is to be introduced into to a larger structure file.

In [None]:
# Example 2: Update or add residue names in the PDB file
input_pdb = "/Users/keithwhite/repos/MDScatter/data/example/frame_0001.pdb"
output_pdb = "output_updated.pdb"
residue_mapping = {
    # "HOH": "WAT",   # Change HOH to WAT
    "DMS": "DMF"   # Change DMS to DMF
}

# Update or add residue names in the PDB file
PDBEditor.update_or_add_residue_names(input_pdb, output_pdb, residue_mapping)

print(f"Residue names updated and saved to {output_pdb}.")

In [None]:
check_names = "/Users/keithwhite/repos/MDScatter/notebooks/output_updated.pdb"

# Use the PDBEditor to read and print unique residue names
residue_names = PDBEditor.read_residue_names(check_names)
print("Unique residue names in the PDB file:", residue_names)

#### Remove all atoms associated with a particular residue name.
You might want to do this to visualize a structure without solvent, for example.

In [None]:
# Example 3: Remove all atoms associated with a particular residue name
input_pdb = "/Users/keithwhite/repos/MDScatter/data/example/frame_0001.pdb"
output_pdb = "output_no_DMS.pdb"
residue_to_remove = "DMS"

# Remove the specific residue (e.g., HOH)
PDBEditor.remove_residue(input_pdb, output_pdb, residue_to_remove)

print(f"Removed all atoms associated with {residue_to_remove} and saved to {output_pdb}.")

In [None]:
check_names = "/Users/keithwhite/repos/MDScatter/notebooks/output_no_DMF.pdb"

# Use the PDBEditor to read and print unique residue names
residue_names = PDBEditor.read_residue_names(check_names)
print("Unique residue names in the PDB file:", residue_names)

#### Map unique hydrogen names to respecitve carbon atoms.
You may want to do this to be able to easily differentiate the atoms when setting up an RMC refinement.

In [None]:
# Example 4: Map hydrogen atoms to carbon atoms in the residue "DMS"
input_pdb = "/Users/keithwhite/repos/MDScatter/data/example/frame_0001.pdb"
output_pdb = "output_mapped.pdb"
residue_name = "DMS"
carbon = "C"
hydrogen = "H"

# Rename hydrogen atoms in the DMS residue based on their closest carbon atoms
PDBEditor.map_atom_pairs(input_pdb, output_pdb, residue_name, carbon, hydrogen, pair_label="CH")

print(f"Mapped and renamed hydrogen atoms to carbon atoms in {residue_name} and saved to {output_pdb}.")