## reformat

In [5]:
def reformat_pdb(input_file, output_file):
    """
    Reformats a PDB file to ensure proper alignment and formatting of fields.
    
    Args:
        input_file (str): Path to the input PDB file.
        output_file (str): Path to the output reformatted PDB file.
    """
    try:
        with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
            for line in infile:
                if line.startswith(("ATOM", "HETATM")):
                    # Extract fields based on PDB file format
                    record = line[0:6].strip()
                    atom_serial = int(line[6:11].strip())
                    atom_name = line[12:16].strip()
                    alt_loc = line[16:17].strip()
                    res_name = line[17:20].strip()
                    chain_id = line[21:22].strip()
                    res_seq = int(line[22:26].strip())
                    i_code = line[26:27].strip()
                    x = float(line[30:38].strip())
                    y = float(line[38:46].strip())
                    z = float(line[46:54].strip())
                    occupancy = float(line[54:60].strip()) if line[54:60].strip() else 0.0
                    temp_factor = float(line[60:66].strip()) if line[60:66].strip() else 0.0
                    element = line[76:78].strip()
                    charge = line[78:80].strip()

                    # Reformat and write the line
                    formatted_line = (
                        f"{record:<6}{atom_serial:>5} {atom_name:<4}{alt_loc:1}"
                        f"{res_name:>3} {chain_id:1}{res_seq:>4}{i_code:1}   "
                        f"{x:>8.3f}{y:>8.3f}{z:>8.3f}{occupancy:>6.2f}{temp_factor:>6.2f}          "
                        f"{element:>2}{charge:>2}\n"
                    )
                    outfile.write(formatted_line)
                else:
                    # Copy other lines as is
                    outfile.write(line)
        print(f"Reformatted PDB file saved to {output_file}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
input_pdb = "molecules/DTAB.pdb"  # Replace with your input PDB file path
output_pdb = "molecules/DTAB_reformatted.pdb"  # Replace with your desired output PDB file path
reformat_pdb(input_pdb, output_pdb)


Reformatted PDB file saved to molecules/DTAB_reformatted.pdb


## extract coordinates

In [10]:
def extract_coordinates_pdb(pdb_file_path, output_file_path=None):
    """
    Extract atomic coordinates (X, Y, Z) from a PDB file.

    Parameters:
        pdb_file_path (str): Path to the PDB file.
        output_file_path (str, optional): Path to save the extracted coordinates (optional).

    Returns:
        list: List of tuples with atomic coordinates [(x1, y1, z1), (x2, y2, z2), ...].
    """
    coordinates = []
    try:
        with open(pdb_file_path, 'r') as file:
            for line in file:
                # Match lines starting with "ATOM" or "HETATM"
                if line.startswith("ATOM") or line.startswith("HETATM"):
                    try:
                        # Split the line into columns to locate X, Y, Z coordinates
                        parts = line.split()
                        x = float(parts[5])  # X coordinate
                        y = float(parts[6])  # Y coordinate
                        z = float(parts[7])  # Z coordinate
                        coordinates.append((x, y, z))
                    except (IndexError, ValueError):
                        # Skip invalid lines
                        continue

        # Save to output file if specified
        if output_file_path:
            with open(output_file_path, 'w') as outfile:
                for coord in coordinates:
                    outfile.write(f"{coord[0]:.3f} {coord[1]:.3f} {coord[2]:.3f}\n")

        return coordinates
    except FileNotFoundError:
        print(f"Error: File {pdb_file_path} not found.")
        return []

# Example usage
pdb_file = "molecules/DTAB_out.pdb"  # Replace with your PDB file path
output_file = "molecules/dtab_coordinates.txt"  # Optional: Replace with your desired output file path
coordinates = extract_coordinates_pdb(pdb_file, output_file)

if coordinates:
    print(f"Extracted {len(coordinates)} coordinates.")
else:
    print("No coordinates found or file format issue.")


Extracted 50 coordinates.


## PDB to mol2

In [33]:
from openbabel import pybel

def convert_pdb_to_mol2(input_pdb, output_mol2):
    """
    Converts a PDB file to MOL2 format using Open Babel.

    Args:
        input_pdb (str): Path to the input PDB file.
        output_mol2 (str): Path to the output MOL2 file.
    """
    try:
        # Load the PDB file
        molecule = next(pybel.readfile("pdb", input_pdb))
        # Write to MOL2 format
        molecule.write("mol2", output_mol2, overwrite=True)
        print(f"Converted {input_pdb} to {output_mol2} successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
input_pdb_file = "molecules/DTAB.pdb"  # Replace with your input PDB file path
output_mol2_file = "molecules/DTAB.mol2"  # Replace with your desired output MOL2 file path
convert_pdb_to_mol2(input_pdb_file, output_mol2_file)

Converted molecules/DTAB.pdb to molecules/DTAB.mol2 successfully.


## extract from PDB and then replace in mol2

In [14]:
import re

def extract_coordinates_pdb(pdb_file_path, output_file_path=None):
    """
    Extract atomic coordinates (X, Y, Z) from a PDB file.

    Parameters:
        pdb_file_path (str): Path to the PDB file.
        output_file_path (str, optional): Path to save the extracted coordinates (optional).

    Returns:
        list: List of tuples with atomic coordinates [(x1, y1, z1), (x2, y2, z2), ...].
    """
    coordinates = []
    try:
        with open(pdb_file_path, 'r') as file:
            for line in file:
                # Match lines starting with "ATOM" or "HETATM"
                if line.startswith("ATOM") or line.startswith("HETATM"):
                    try:
                        # Split the line into columns to locate X, Y, Z coordinates
                        parts = line.split()
                        x = float(parts[6])  # X coordinate
                        y = float(parts[7])  # Y coordinate
                        z = float(parts[8])  # Z coordinate
                        coordinates.append((x, y, z))
                    except (IndexError, ValueError):
                        # Skip invalid lines
                        continue

        # Save to output file if specified
        if output_file_path:
            with open(output_file_path, 'w') as outfile:
                for coord in coordinates:
                    outfile.write(f"{coord[0]:.3f} {coord[1]:.3f} {coord[2]:.3f}\n")

        return coordinates
    except FileNotFoundError:
        print(f"Error: File {pdb_file_path} not found.")
        return []

def replace_coordinates_in_mol2(mol2_file_path, new_coordinates, output_file_path):
    """
    Replace atomic coordinates in a MOL2 file with new coordinates.

    Parameters:
        mol2_file_path (str): Path to the original MOL2 file.
        new_coordinates (list): List of tuples containing new coordinates [(x, y, z), ...].
        output_file_path (str): Path to save the modified MOL2 file.
    """
    with open(mol2_file_path, 'r') as infile, open(output_file_path, 'w') as outfile:
        atom_section = False
        coord_index = 0

        for line in infile:
            if line.strip() == "@<TRIPOS>ATOM":
                atom_section = True
                outfile.write(line)
                continue
            elif line.strip() == "@<TRIPOS>BOND":
                atom_section = False
                outfile.write(line)
                continue
            
            if atom_section:
                # Replace coordinates in the atom section
                parts = re.split(r'\s+', line.strip())
                if len(parts) > 4 and coord_index < len(new_coordinates):
                    x, y, z = new_coordinates[coord_index]
                    parts[2] = f"{x:.4f}"
                    parts[3] = f"{y:.4f}"
                    parts[4] = f"{z:.4f}"
                    coord_index += 1
                    outfile.write(" ".join(parts) + "\n")
                else:
                    outfile.write(line)
            else:
                # Write non-atom-section lines as is
                outfile.write(line)

    print(f"Coordinates replaced and saved to {output_file_path}")

# Combined Workflow
pdb_file = "molecules/DTAB.pdb"  # Path to your PDB file
output_coordinates_file = "molecules/dtab_coordinates.txt"  # Path to save extracted coordinates
mol2_file = "molecules/DTAB.mol2"  # Path to your MOL2 file
updated_mol2_file = "molecules/dtab_updated.mol2"  # Path for the updated MOL2 file

# Step 1: Extract Coordinates from PDB
coordinates = extract_coordinates_pdb(pdb_file, output_coordinates_file)

if coordinates:
    print(f"Extracted {len(coordinates)} coordinates from the PDB file.")
    
    # Step 2: Replace Coordinates in MOL2
    replace_coordinates_in_mol2(mol2_file, coordinates, updated_mol2_file)
else:
    print("No coordinates found in the PDB file or file format issue.")


Extracted 50 coordinates from the PDB file.
Coordinates replaced and saved to molecules/dtab_updated.mol2


# mol2

## reformat

In [15]:
import re

def reformat_mol2(input_file_path, output_file_path):
    """
    Reformat a MOL2 file to ensure proper structure and consistent spacing.

    Parameters:
        input_file_path (str): Path to the input MOL2 file.
        output_file_path (str): Path to save the reformatted MOL2 file.
    """
    try:
        with open(input_file_path, 'r') as infile, open(output_file_path, 'w') as outfile:
            atom_section = False
            for line in infile:
                # Check for sections
                if line.strip() == "@<TRIPOS>ATOM":
                    atom_section = True
                    outfile.write(line)
                    continue
                elif line.strip() == "@<TRIPOS>BOND":
                    atom_section = False
                    outfile.write(line)
                    continue
                
                if atom_section:
                    # Reformat lines in the ATOM section
                    parts = re.split(r'\s+', line.strip())
                    if len(parts) >= 9:
                        # Reformat each line in the ATOM section
                        atom_id = int(parts[0])         # Atom ID
                        atom_name = parts[1]            # Atom name
                        x = float(parts[2])             # X coordinate
                        y = float(parts[3])             # Y coordinate
                        z = float(parts[4])             # Z coordinate
                        atom_type = parts[5]            # Atom type
                        res_id = int(parts[6])          # Residue ID
                        res_name = parts[7]            # Residue name
                        charge = float(parts[8])        # Partial charge

                        # Write reformatted line with proper spacing
                        outfile.write(f"{atom_id:<7}{atom_name:<8}{x:>10.4f}{y:>10.4f}{z:>10.4f}  "
                                      f"{atom_type:<8}{res_id:>4}  {res_name:<6}{charge:>10.4f}\n")
                    else:
                        outfile.write(line)
                else:
                    # Write non-ATOM section lines as is
                    outfile.write(line)
        
        print(f"Reformatted MOL2 file saved to {output_file_path}")

    except FileNotFoundError:
        print(f"Error: File {input_file_path} not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
input_mol2_file = "molecules/dtab_updated.mol2"  # Path to your original MOL2 file
output_mol2_file = "molecules/dtab_reformatted.mol2"  # Path for the reformatted MOL2 file

reformat_mol2(input_mol2_file, output_mol2_file)


Reformatted MOL2 file saved to molecules/dtab_reformatted.mol2


# VISUALIZATION

In [38]:
import py3Dmol
import os

# Input file path
input_file = "molecules/DTAB.mol2"  # Change this to your input file (PDB or MOL2)

# Ensure the file exists
if not os.path.exists(input_file):
    raise FileNotFoundError(f"The file {input_file} does not exist. Please check the path.")

# Detect file format based on extension
file_extension = input_file.split('.')[-1].lower()
if file_extension == "pdb":
    file_format = "pdb"
elif file_extension == "mol2":
    file_format = "mol2"
else:
    raise ValueError(f"Unsupported file format: {file_extension}. Please use a PDB or MOL2 file.")

# Load the file
with open(input_file, "r") as file:
    structure_data = file.read()

# Create viewer
viewer = py3Dmol.view(width=800, height=400)
viewer.addModel(structure_data, file_format)  # Dynamically use the detected file format
viewer.setStyle({
    "sphere": {"scale": 0.3},  # Scale for the balls (atoms)
    "stick": {"radius": 0.2}   # Radius for the sticks (bonds)
})
viewer.zoomTo()
viewer.show()
