# Development of miscellaneous new features for polymerist

## Testing polymerist importability within environment

In [None]:
import numpy as np
from openff.toolkit import Molecule, Topology, ForceField

In [None]:
import polymerist as ps
from polymerist.genutils import pyimports, importutils

import pandas as pd
print(importutils.module_hierarchy(ps))

In [None]:
import nglview

print(nglview.__version__)
nglview.demo()

In [None]:
from polymerist.polymers.monomers import specification
from rdkit import Chem

smi = 'CCO-c1ccccc1-N=C=C'
mol1 = Chem.MolFromSmiles(smi)
display(mol1)

sma = specification.expanded_SMILES(smi, assign_map_nums=True)
exp_sma = specification.compliant_mol_SMARTS(sma)
mol2 = Chem.MolFromSmarts(sma)
display(mol2)


In [None]:
from openff.toolkit import Molecule

offmol = Molecule.from_smiles(smi)
offmol.generate_conformers(n_conformers=1)
offmol.visualize(backend='nglview')

# Parsing lines from PDB file

In [None]:
l1 = 'ATOM    189  C99 OCT     5      39.590  30.100  38.320  1.00  0.00'           
l2 = 'ATOM    190 C100 OCT     5      38.850  31.110  37.700  1.00  0.00'
l3 = 'HETATM   47  H21 UNL     1       0.000   0.000   0.000  1.00  0.00           H '

In [None]:
from typing import Any

PDB_ATOM_TOKEN_COLUMNS : dict[str, tuple[int, int]] = {
    'Is Heteratom' : (1, 6),
    'Atom serial number' : (7, 11),
    'Atom name' : (13, 16),
    'Alternate location indicator' : (17, 17),
    'Residue name' : (18, 20),
    'Chain identifier' : (22, 22),
    'Residue sequence number' : (23, 26),
    'Code for insertions of residues' : (27, 27),
    'X (angstrom)' : (31, 38),
    'Y (angstrom)' : (39, 46),
    'Z (angstrom)' : (47, 54),
    'Occupancy' : (55, 60),
    'Temperature factor' : (61, 66),
    'Segment identifier' : (73, 76),
    'Element symbol' : (77, 78),
    'Charge' : (79, 80),
} # taken from PDB spec doc (https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html)
def tokenize_pdb_atom_line(line : str) -> dict[str, Any]:
    line = line.ljust(80, ' ') # pad line to 80 characters with spaces
    return {
        prop_name : line[i_start-1:i_end]
            for prop_name, (i_start, i_end) in PDB_ATOM_TOKEN_COLUMNS.items()
    }


for line in (l1, l2, l3):
    print(tokenize_pdb_atom_line(line))

# Another thing