# TODOS:
- Mechanism for selecting apo/pred; right now we are selecting at most one apo/pred by `sort_score`
- Relax with requirement of presence of apo and holo
- Agree on what out feature input and output should look like

In [None]:
from plinder.core.structure.structure import Structure
from plinder.core import PlinderSystem
from pathlib import Path

# Load structure

#### load holo structure

In [None]:
ROOT_DIR = Path("/Users/yusuf/.local/share")
holo_struc = Structure.load_structure(
    id="1avd__1__1.A__1.C",
    protein_path=ROOT_DIR/"plinder/2024-06/v2/systems/1avd__1__1.A__1.C/receptor.cif",
    protein_sequence=ROOT_DIR/"plinder/2024-06/v2/systems/1avd__1__1.A__1.C/sequences.fasta",
    list_ligand_sdf_and_input_smiles=[(ROOT_DIR/"plinder/2024-06/v2/systems/1avd__1__1.A__1.C/ligand_files/1.C.sdf"
        , "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O")]
    )

#### load apo structure

In [None]:
apo_struc = Structure.load_structure(
    id="1avd_A",
    protein_path=ROOT_DIR/ "plinder/2024-06/v2/linked_structures/apo/1avd__1__1.A__1.C/1nqn_A/superposed.cif",
    protein_sequence=ROOT_DIR/"plinder/2024-06/v2/systems/1avd__1__1.A__1.C/sequences.fasta",
    structure_type="apo"
    )

#### list structure  fields

In [None]:
holo_struc.model_fields

#### get structure properties

In [None]:
holo_struc.get_properties()

#### Inspect holo structure

In [None]:
holo_struc

#### Inspect holo ligand
Returns a chain-mapped dictionary of original_unresolved_mol, resolved_ligand_mol resolved_ligand_mol_conformer, matches


In [None]:
holo_struc.ligand_mols

#### Inspect holo sequences
Returns a chain-mapped dictionary of sequences

In [None]:
holo_struc.input_sequences

#### Inspect holo atom array
This is the input sequence-renumbered array


In [None]:
holo_struc.protein_atom_array

#### Inspect unresolved input strcture sequence


In [None]:
# holo_struc.aligned_unresolved_seqs

#### Inspect unresolved input strcture indices
Unresolved structure original indices with indices matching the residue number of resolved sequence

In [None]:
# holo_struc.unresolved_aligned_indices

#### Inspect unresolved input strcture sequence
Unresolved structure original indices with indices matching the residue number of resolved sequence

#### Inspect  orignal holo PDB ligand loaded from sdf

In [None]:
holo_struc.resolved_ligand_mols #resolved_ligand_mols

#### Inspect input  ligand loaded from smiles

In [None]:
holo_struc.input_ligand_templates #resolved_ligand_mols
holo_struc.input_ligand_conformers  #resolved_ligand_conformers

#### Inspect random conformer of resolved ligand loaded from smiles

In [None]:
holo_struc.input_ligand_conformers  #resolved_ligand_conformers

#### Inspect coordinates of random conformer of resolved ligand loaded from smiles

In [None]:
holo_struc.input_ligand_conformer_coords #resolved_ligand_conformers_coords

#### Inspect coordinates of resolved ligand loaded from smiles and aligned with original ligand

#### Inspect coordinates of original unresolved ligand

In [None]:
holo_struc.resolved_ligand_mols_coords

#### Inspect protein structure dataframe with indices renumbered to match sequence

In [None]:
#holo_struc.protein_dataframe

#### Inspect protein backbone mask

In [None]:
holo_struc.protein_backbone_mask

#### Inspect protein calpha mask

In [None]:
holo_struc.protein_calpha_mask

#### Inspect number of protein atoms

In [None]:
holo_struc.protein_n_atoms

#### Inspect protein chain ids

In [None]:
holo_struc.protein_chains

#### Inspect unresolved structure fasta

In [None]:
holo_struc.protein_structure_sequence_fasta

### Test sequence alignment

In [None]:
holo_struc

In [None]:
apo_struc

In [None]:
# Note for structure alignment to work, apo and holo need to have same chain id
apo_struc.set_chain("1.A")

In [None]:
apo_struc.protein_atom_array

In [None]:
seq_align = holo_struc.get_per_chain_seq_alignments(apo_struc)

In [None]:
seq_align

In [None]:
holo_struc.protein_atom_array[0]

In [None]:
apo_struc.protein_atom_array[0]

### Alignment and Cropping

In [None]:
align_common_seq = holo_struc.align_common_sequence(
        apo_struc,
    )

In [None]:
holo_struc

In [None]:
apo_struc

In [None]:
align_common_seq[0]

In [None]:
align_common_seq[1]

In [None]:
superimposed_apo = apo_struc.superimpose(holo_struc)
superimposed_apo

In [None]:
holo_struc.protein_coords

In [None]:
#apo_struc.protein_dataframe

In [None]:
holo_struc.protein_structure_b_factor

In [None]:
test_sys = PlinderSystem(system_id="1avd__1__1.A__1.C", input_smiles_dict={"1.C": "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"})

In [None]:
test_sys.holo_structure

In [None]:
test_sys.alt_structures

In [None]:
test_sys.best_linked_structures_paths

In [None]:
#cropped = test_sys.create_masked_bound_unbound_complexes()

In [None]:
mask = holo_struc.protein_atom_array.atom_name == "CA"
holo_struc.filter(
        property="atom_name",
        mask="CA",

    )

In [None]:
holo_struc

In [None]:
holo_struc + apo_struc

In [None]:
holo_struc.protein_atom_array[holo_struc.protein_atom_array.chain_id == "1.A"]

In [None]:
holo_struc

## Loader

In [None]:
from plinder.core.loader import PlinderDataset
from plinder.core.loader.dataset import get_torch_loader
from plinder.core import get_split
from plinder.core.scores import query_links

#### Make plinder dataset

In [None]:
train_dataset = PlinderDataset()
#train_dataset = PlinderDataset(df=splits_df[splits_df.system_id =="6pl9__1__1.A__1.C"])

In [None]:
test_data = train_dataset[1]

In [None]:
holo_struc.input_ligand_conformer_coords

In [None]:
train_dataset[2]

test_data[110]

In [None]:
from plinder.core.utils.unpack import get_zips_to_unpack
zips = get_zips_to_unpack(kind="systems", system_ids=["11as__1__1.B__1.D"])

In [None]:
zips

#### Make torch loader

In [None]:
train_loader = get_torch_loader(
    train_dataset
)

In [None]:
for data in train_loader:

    test_torch = data
    break
    #for k, v in test_torch['input_features'].items():
    #    if v.shape[1] > 1:
    #        break

In [None]:
test_torch.keys()

In [None]:
test_torch['id']

In [None]:
for k, v in test_torch['features_and_coords'].items():
    print(k, v.shape)

In [None]:
holo_struc.input_ligand_conformer_atom_index_maps

In [None]:
holo_struc.resolved_ligand_structure_atom_index_maps

In [None]:
# holo_struc.conformer2resolved_mapping