# TODOS:
- Mechanism for selecting apo/pred; right now we are selecting at most one apo/pred by `sort_score`
- Relax with requirement of presence of apo and holo
- Agree on what out feature input and output should look like

In [None]:
from plinder.core.structure.structure import Structure
from plinder.core import PlinderSystem
from pathlib import Path

# Load structure

#### load holo structure

In [None]:
holo_struc = Structure.load_structure(
    id="1avd__1__1.A__1.C",
    protein_path=Path(
        "/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/receptor.cif"),
    protein_sequence=Path(
        "/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/sequences.fasta"),
    list_ligand_sdf_and_resolved_smiles=[(Path(
        "/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/ligand_files/1.C.sdf"
        ), "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O")]
    )

#### load apo structure

In [None]:
apo_struc = Structure.load_structure(
    id="1avd_A",
    protein_path=Path(
        "/Users/yusuf/.local/share/plinder/2024-06/v2/linked_structures/apo/1avd__1__1.A__1.C/1nqn_A/superposed.cif"),
    protein_sequence=Path(
        "/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/sequences.fasta"),
    structure_type="apo"
    )

#### list structure  fields

In [None]:
holo_struc.model_fields

#### get structure properties

In [None]:
holo_struc.get_properties()

#### Inspect holo structure

In [None]:
holo_struc

#### Inspect holo ligand
Returns a chain-mapped dictionary of original_unresolved_mol, resolved_ligand_mol resolved_ligand_mol_conformer, matches


In [None]:
holo_struc.ligand_mols

#### Inspect holo sequences
Returns a chain-mapped dictionary of sequences

In [None]:
holo_struc.resolved_sequences

#### Inspect holo atom array
This is the input sequence-renumbered array


In [None]:
holo_struc.protein_atom_array

#### Inspect unresolved input strcture sequence


In [None]:
holo_struc.aligned_unresolved_seqs

#### Inspect unresolved input strcture indices
Unresolved structure original indices with indices matching the residue number of resolved sequence

In [None]:
holo_struc.unresolved_aligned_indices

#### Inspect unresolved input strcture sequence
Unresolved structure original indices with indices matching the residue number of resolved sequence

#### Inspect unresolved orignal ligand loaded from sdf

In [None]:
holo_struc.original_unresolved_mols

#### Inspect resolved ligand loaded from smiles

In [None]:
holo_struc.resolved_ligand_mols

#### Inspect random conformer of resolved ligand loaded from smiles

In [None]:
holo_struc.resolved_ligand_conformers

#### Inspect coordinates of random conformer of resolved ligand loaded from smiles

In [None]:
holo_struc.resolved_ligand_conformers_coords

#### Inspect coordinates of resolved ligand loaded from smiles and aligned with original ligand

In [None]:
holo_struc.resolved_ligand_mols_coords

#### Inspect coordinates of original unresolved ligand

In [None]:
holo_struc.original_unresolved_mols_coords

#### Inspect protein structure dataframe with indices renumbered to match sequence

In [None]:
holo_struc.protein_dataframe

#### Inspect protein backbone mask

In [None]:
holo_struc.protein_backbone_mask

#### Inspect protein calpha mask

In [None]:
holo_struc.protein_calpha_mask

#### Inspect number of protein atoms

In [None]:
holo_struc.protein_n_atoms

#### Inspect protein chain ids

In [None]:
holo_struc.protein_chains

#### Inspect unresolved structure fasta

In [None]:
holo_struc.unresolved_protein_fasta

### Test sequence alignment

In [None]:
holo_struc

In [None]:
apo_struc

In [None]:
# Note for structure alignment to work, apo and holo need to have same chain id
apo_struc.set_chain("1.A")

In [None]:
apo_struc.protein_atom_array

In [None]:
seq_align = holo_struc.get_per_chain_seq_alignments(apo_struc)

In [None]:
seq_align

In [None]:
holo_struc.protein_atom_array[0]

In [None]:
apo_struc.protein_atom_array[0]

### Alignment and Cropping

In [None]:
align_common_seq = holo_struc.align_common_sequence(
        apo_struc,
    )

In [None]:
holo_struc

In [None]:
apo_struc

In [None]:
align_common_seq[0]

In [None]:
align_common_seq[1]

In [None]:
superimposed_apo = apo_struc.superimpose(holo_struc)
superimposed_apo

In [None]:
holo_struc.protein_coords

In [None]:
apo_struc.protein_dataframe

In [None]:
holo_struc.protein_b_factor

In [None]:
test_sys = PlinderSystem(system_id="1avd__1__1.A__1.C", resolved_smiles_dict={"1.C": "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"})

In [None]:
test_sys.holo_structure

In [None]:
test_sys.alt_structures

In [None]:
test_sys.best_linked_structures_paths

In [None]:
cropped = test_sys.create_masked_bound_unbound_complexes()

In [None]:
cropped[0]

In [None]:
cropped[1]

In [None]:
mask = holo_struc.protein_atom_array.atom_name == "CA"
holo_struc.filter(
        property="atom_name",
        mask="CA",

    )

In [None]:
holo_struc

In [None]:
holo_struc + apo_struc

In [None]:
holo_struc.protein_atom_array[holo_struc.protein_atom_array.chain_id == "1.A"]

In [None]:
holo_struc

## Loader

In [None]:
from plinder.core.loader import PlinderDataset
from plinder.core.loader.loader import get_torch_loader
from plinder.core import get_split
from plinder.core.scores import query_links

In [None]:
splits_df = get_split()

In [None]:
links = query_links()

In [None]:
links[links.reference_system_id == "6pl9__1__1.A__1.C"]

In [None]:
train_dataset = PlinderDataset(df=splits_df[splits_df.system_id =="6pl9__1__1.A__1.C"])

In [None]:
test_data = train_dataset[0]

In [None]:
test_data

In [None]:
train_loader = get_torch_loader(
    train_dataset
)

In [None]:
for data in train_loader:
    print(data)
    break

In [None]:
from rdkit import Chem
Chem.MolFromSmiles('s1cncc1')

In [None]:
Chem.MolFromSmiles('C(=O)(O)c1cncs1')

In [None]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem

core = Chem.MolFromSmiles('c1cncs1') # first molecule
print (AllChem.EmbedMolecule(core))
AllChem.UFFOptimizeMolecule(core)

Chem.MolToMolBlock(core) # we have some coordinates
print ("****************")
mol = Chem.MolFromSmiles('C(=O)(O)c1cncs1')

AllChem.ConstrainedEmbed(mol, core, randomseed=123)
Chem.MolToMolBlock(mol)

In [None]:
from plinder.core.structure.atoms import match_ligands
match_ligands(
    "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O", Path("/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/ligand_files/1.C.sdf"
        ), add_hydrogen=False)

In [None]:
a = Chem.MolFromSmiles("CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O")
a

In [None]:
b = next(Chem.SDMolSupplier(Path("/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/ligand_files/1.C.sdf")))
b

In [None]:
AllChem.ConstrainedEmbed(a, b)

In [None]:
b

In [None]:
a.GetSubstructMatches(b)

In [None]:
holo_struc.ligand_mols

In [None]:
def match_ligands(
    resolved_smiles: str, unresolved_sdf: Path, add_hydrogen
) -> tuple[list[tuple[int]], Chem.rdchem.Mol, Chem.rdchem.Mol]:
    try:
        resolved_mol = Chem.MolFromSmiles(resolved_smiles)
        unresolved_mol = next(Chem.SDMolSupplier(unresolved_sdf))
        if add_hydrogen:
            resolved_mol = Chem.AddHs(resolved_mol, addCoords=True)
            unresolved_mol = Chem.AddHs(unresolved_mol, addCoords=True)
        AllChem.ConstrainedEmbed(resolved_mol, unresolved_mol)
    except Exception:
        # TODO: Need to figure out how to handle failure, for now,
        # set it to unresolved
        resolved_mol = unresolved_mol = next(Chem.SDMolSupplier(unresolved_sdf))
    # Returns all possible set of matches in case of symmetric molecules
    matches = resolved_mol.GetSubstructMatches(unresolved_mol)
    return matches, resolved_mol, unresolved_mol

In [None]:
match_ligands("CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O",
              Path("/Users/yusuf/.local/share/plinder/2024-06/v2/systems/1avd__1__1.A__1.C/ligand_files/1.C.sdf"), add_hydrogen=False)