In [1]:
from pathlib import Path

import IMP
import IMP.atom

import cctbx.crystal
import cctbx.xray
from iotbx import pdb
from iotbx.pdb.hierarchy import root
from scitbx.array_family import flex

In [125]:
pdb_file = Path(Path.home(), "Documents/xray/data/pdbs/7mhf/7mhf_no_het.pdb")
pdb_inp = pdb.input(file_name=str(pdb_file))
hierarchy = pdb_inp.construct_hierarchy()
asc = hierarchy.atom_selection_cache()
sel = asc.selection("not altloc 'B'")
hierarchy = hierarchy.select(sel)

In [126]:
crystal_symmetry = pdb_inp.crystal_symmetry()
crystal_symmetry

crystal.symmetry(
    unit_cell=(113.706, 53.32, 44.567, 90, 102.96, 90),
    space_group_symbol="C 1 2 1"
  )

In [127]:
xray_structure = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry)


In [128]:
xray_structure.crystal_symmetry().show_summary()

Unit cell: (113.706, 53.32, 44.567, 90, 102.96, 90)
Space group: C 1 2 1 (No. 5)


In [129]:
xray_structure.scatterers().size()

4682

In [130]:
translated_sites_frac.size()

4682

In [131]:

# Access the unit cell
unit_cell = xray_structure.unit_cell()

# Get atomic sites in fractional coordinates
sites_frac = xray_structure.sites_frac()

# Define the translation vector (2 unit cells along each axis)
translation_vector = (0, 0, 0)

# Convert translation vector to flex array matching the size of sites_frac
translation_vector_array = flex.vec3_double(sites_frac.size(), translation_vector)

# Translate the atomic positions
translated_sites_frac = sites_frac + translation_vector_array

# Update the xray_structure with the translated coordinates
xray_structure.set_sites_frac(translated_sites_frac)
hierarchy.adopt_xray_structure(xray_structure)

In [132]:
models = hierarchy.models()
for model in models:
    for chain in model.chains():
        # if chain.id == 'A':
        #     chain.id = 'B'
        for atom in chain.atoms():
            atom.occ = 0.5

            print(dir(atom))
            break

model = hierarchy.only_model()
model.id = str(1)



['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'angle', 'b', 'chain', 'charge', 'charge_as_int', 'charge_tidy', 'data_offsets', 'detached_copy', 'determine_chemical_element_simple', 'distance', 'element', 'element_is_hydrogen', 'element_is_ion', 'element_is_negative_ion', 'element_is_positive_ion', 'fdp', 'fetch_labels', 'format_anisou_record', 'format_atom_record', 'format_atom_record_group', 'format_sigatm_record', 'format_siguij_record', 'fp', 'has_siguij', 'hetero', 'i_seq', 'id_str', 'is_in_same_conformer_as', 'memory_id', 'name', 'occ', 'parent', 'pdb_element_charge_columns', 'pdb_label_columns', 'quote', 'segid', 'serial', 'serial_as_int', 'set_b', 'set_charge', 'set_che

In [133]:
pdb_inp = pdb.input(file_name=str(pdb_file))
hierarchy_2 = pdb_inp.construct_hierarchy()
asc = hierarchy_2.atom_selection_cache()
sel = asc.selection("not altloc 'B'")
hierarchy_2 = hierarchy_2.select(sel)

model = hierarchy_2.only_model()
model.id = str(2)

for chain in model.chains():
    for atom in chain.atoms():
        atom.occ = 0.5

hierarchy.append_model(model.detached_copy())

In [134]:
with open("../data/test.pdb", "w") as f:
    f.write(hierarchy.as_pdb_string(crystal_symmetry=xray_structure.crystal_symmetry()))

In [4]:
import IMP
import IMP.atom

m = IMP.Model()
hs = IMP.atom.read_multimodel_pdb("../data/411.pdb", m, IMP.atom.ATOMPDBSelector())

occs = [0.3504255148456590, 0.6495744851543420]
for i in range(len(hs)):
    h = hs[i]
    for pid in IMP.atom.Selection(h).get_selected_particle_indexes():
        at = IMP.atom.Atom(m, pid)
        at.set_occupancy(occs[i])

IMP.atom.write_multimodel_pdb(hs, "../data/411.pdb")


In [9]:
with open("../data/411.pdb", 'r') as file:
    lines = file.readlines()

cnt = 0
modified_lines = []
in_section = False  # Flag to indicate if we are between 'MODEL' and 'TER'
for line in lines:
    print(line)
    if len(line) > 21:
        print(line[21])
    # Check if the line starts with 'MODEL'
    if line.startswith('MODEL'):
        in_section = True
        modified_lines.append(line)  # Keep the 'MODEL' line as is
        continue  # Proceed to the next line

    # Check if the line starts with 'TER'
    elif line.startswith('ENDMDL'):
        in_section = False
        cnt += 1
        modified_lines.append(line)  # Keep the 'TER' line as is
        continue  # Proceed to the next line

    # Modify lines only if we are within the 'MODEL' and 'TER' section
    if in_section and len(line) >= 17:
        # Replace the 17th character (index 16) with the replacement character
        line = line[:16] + chr(ord("A")+cnt) + line[17:]

    # Append the modified or unmodified line to the list
    modified_lines.append(line)

# Write the modified lines back to the file
with open("../data/411_mod.pdb", 'w') as file:
    file.writelines(modified_lines)


MODEL        1

ATOM      1  N   SER A   1      -3.028 -12.972 -16.899  0.35 28.23           N

 
ATOM      2  CA  SER A   1      -2.522 -11.748 -16.363  0.35 32.49           C

 
ATOM      3  C   SER A   1      -2.701 -11.609 -14.870  0.35 25.78           C

 
ATOM      4  O   SER A   1      -3.465 -12.352 -14.280  0.35 23.90           O

 
ATOM      5  CB  SER A   1      -3.215 -10.423 -16.958  0.35 35.98           C

 
ATOM      6  OG  SER A   1      -4.143 -10.651 -17.997  0.35 54.30           O

 
ATOM      7  N   GLY A   2      -2.109 -10.525 -14.297  0.35 31.81           N

 
ATOM      8  CA  GLY A   2      -1.972 -10.149 -12.916  0.35 26.67           C

 
ATOM      9  C   GLY A   2      -0.886 -10.970 -12.260  0.35 26.23           C

 
ATOM     10  O   GLY A   2      -0.253 -11.896 -12.851  0.35 28.62           O

 
ATOM     11  N   PHE A   3      -0.751 -10.623 -10.947  0.35 24.45           N

 
ATOM     12  CA  PHE A   3       0.405 -10.694 -10.143  0.35 26.27           C

 


In [18]:
with open("../data/3k0m_8_state_mod.pdb", 'r') as f:
    lines = f.readlines()

# Find all indices of lines containing "MODEL" and "ENDMDL"
model_indices = [i for i, line in enumerate(lines) if 'MODEL' in line]
endmdl_indices = [i for i, line in enumerate(lines) if 'ENDMDL' in line]

# Determine which indices to keep
keep_indices = set()
if model_indices:
    keep_indices.add(model_indices[0])  # First "MODEL"
    # if len(model_indices) > 1:
    #     keep_indices.add(model_indices[-1])  # Last "MODEL"
if endmdl_indices:
    # keep_indices.add(endmdl_indices[0])  # First "ENDMDL"
    if len(endmdl_indices) > 1:
        keep_indices.add(endmdl_indices[-1])  # Last "ENDMDL"

# Write the processed lines to the output file
with open("../data/3k0m_8_state_mod.pdb", 'w') as f:
    for i, line in enumerate(lines):
        if ('MODEL' in line or 'ENDMDL' in line):
            if i in keep_indices:
                f.write(line)
            else:
                continue  # Skip the line
        else:
            f.write(line)

In [8]:
orig_pdb_file = "../data/tmp/3k0m_4_state_mod_refine_001.pdb"
out_pdb_file = "../data/tmp/3k0m_4_state_mod_refine_001_mod.pdb"

with open(orig_pdb_file, 'r') as file:
    lines = file.readlines()

# Split the input into lines
# lines = pdb_data.strip().split("\n")

# Create dictionaries to store entries for each conformation
conformation_dict = {'A': [], 'B': [], 'C': [], 'D': []}

# Iterate over each line and sort based on the conformation identifier (character at column 17)
for line in lines:
    if len(line) > 16:
        conformation_id = line[16]  # Character at column 17 (0-indexed position 16)
        if conformation_id in conformation_dict:
            conformation_dict[conformation_id].append(line)

# Combine the sorted lines based on conformation order (A, B, C, D)
sorted_entries = []
for i in range(1,4):
    key = chr(ord('A') + i-1)
    sorted_entries.append(f"MODEL    {i}")
    sorted_entries.extend(conformation_dict[key])
    sorted_entries.append("ENDMDL")

# Join the sorted lines and return as a single string
new_str = "\n".join(sorted_entries)

In [9]:
with open(out_pdb_file, 'w') as file:
    file.writelines(new_str)

In [10]:
print(new_str)


MODEL    1
REMARK   3   R VALUE     (WORKING + TEST SET) : 0.2212

REMARK   3   R VALUE            (WORKING SET) : 0.2196

REMARK   3  DEVIATIONS FROM IDEAL VALUES - RMSD. RMSZ FOR BONDS AND ANGLES.

REMARK   3    PLANARITY :  0.013   0.107    904

REMARK   3      ALLOWED  :  9.32 %

ATOM      1  N  AVAL A   2      20.164  -3.659  17.810  0.25 63.50           N

ATOM      2  CA AVAL A   2      19.038  -2.945  18.374  0.25 61.53           C

ATOM      3  C  AVAL A   2      18.659  -1.726  17.549  0.25 49.17           C

ATOM      4  O  AVAL A   2      19.491  -0.900  17.205  0.25 59.49           O

ATOM      5  CB AVAL A   2      19.313  -2.492  19.833  0.25 79.12           C

ATOM      6  CG1AVAL A   2      18.107  -1.775  20.416  0.25 87.97           C

ATOM      7  CG2AVAL A   2      19.732  -3.663  20.679  0.25 89.15           C

ATOM      8  HA AVAL A   2      18.294  -3.566  18.364  0.25 73.72           H

ATOM      9  HB AVAL A   2      20.047  -1.857  19.833  0.25 94.82         