**Analysing repeating scaffolds in JAK2 and DRD2 dataset (of known inhibitors)**

In [1]:
import pandas as pd
import re
from collections import Counter

def find_scaffolds(protein="JAK2"):
    df = pd.read_csv(f"data/{protein}_with_properties_{protein}binding.txt")
    print(len(df))
    df = df[df["Label"] == 1]
    print(len(df))

    if "SMILES" in df.columns:
        smiles_list = df["SMILES"].dropna().astype(str).tolist()
    else:
        col = [c for c in df.columns if c.lower()=="smiles"]
        if col:
            smiles_list = df[col[0]].dropna().astype(str).tolist()
        else:
            raise ValueError("SMILES column not found")

    pattern = re.compile(r"%\d{2}|Br|Cl|\[[^\]]*\]|.")

    first_n = 8 
    prompts_seq = []
    for s in smiles_list:
        tokens = pattern.findall(s)
        seq = tokens[:first_n]
        seq_str = ''.join(seq)
        prompts_seq.append(seq_str)

    counter = Counter(prompts_seq)
    top5 = counter.most_common(5)
    print(f"{protein}: {top5}")

    return [smiles for smiles, _ in top5]

In [2]:
prompt_dict = {
    "JAK2": find_scaffolds("JAK2"),
    "DRD2": find_scaffolds("DRD2"),
    "DBH": find_scaffolds("DBH")
}

print(prompt_dict)

4068
3650
JAK2: [('N#CCC1(n', 300), ('CN(c1ncn', 149), ('CS(=O)(=', 138), ('CC(C)(C)', 113), ('CN1CCN(c', 104)]
4221
3350
DRD2: [('COc1cccc', 392), ('O=S(=O)(', 97), ('COc1ccc(', 89), ('CN1CCc2c', 88), ('O=C(NCCC', 73)]
10
5
DBH: [('O=c1cccc', 1), ('NCCc1cnc', 1), ('CCN(CC)C', 1), ('NCc1c[nH]c(', 1), ('Fc1cc(F)', 1)]
{'JAK2': ['N#CCC1(n', 'CN(c1ncn', 'CS(=O)(=', 'CC(C)(C)', 'CN1CCN(c'], 'DRD2': ['COc1cccc', 'O=S(=O)(', 'COc1ccc(', 'CN1CCc2c', 'O=C(NCCC'], 'DBH': ['O=c1cccc', 'NCCc1cnc', 'CCN(CC)C', 'NCc1c[nH]c(', 'Fc1cc(F)']}


In [21]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from get_mol_prop import compute_properties_with_affinity
from datetime import datetime
import os


def gen_with_GPT2(protein):
    print(f"Protein: {protein}")
    tokenizer = AutoTokenizer.from_pretrained("entropy/gpt2_zinc_87m")
    tokenizer.pad_token     = tokenizer.eos_token #better to define it to be safe
    tokenizer.padding_side  = 'left'  #since we are using a decoder nly model
    tokenizer.model_max_length = 1024
    
    model = AutoModelForCausalLM.from_pretrained("entropy/gpt2_zinc_87m")
    model.config.pad_token_id = tokenizer.pad_token_id 
    
    prompts = prompt_dict[protein]
    print(f"  Prompts: {prompts}")
    
    generated = []
    for seed in prompts:
        enc = tokenizer(
            seed,
            return_tensors="pt",
            padding=True, 
            truncation=True, 
            max_length=tokenizer.model_max_length,
        )
        
        outputs = model.generate(
            enc.input_ids,
            attention_mask=enc.attention_mask,
            max_length=120,
            do_sample=True,
            top_p=0.9,
            temperature=0.8,
            num_return_sequences=50,
            pad_token_id=tokenizer.pad_token_id,
        )
        
        print(f"\tGenerating using seed {seed}] ...")
        for o in outputs:
            generated.append(tokenizer.decode(o, skip_special_tokens=True))
    
    temp_df = compute_properties_with_affinity(input_data=generated, gnina_path="docking", config_path=f"docking/{protein}/{protein}_config.txt", temp_dir="/tmp/")

    # save the molecules along with their properties and CNNaffinity, if CNNaffinity is None
    gen_final_df = temp_df.dropna(subset=["CNNaffinity"])
    date_time = datetime.now().strftime("%d%m%y_%H%M")
    dir_name = f"results/SMILES_GPT2/{protein}_{date_time}"
    os.makedirs(dir_name, exist_ok=True)
    gen_final_df.to_csv(f"{dir_name}/all.csv", index=False)

In [22]:
gen_with_GPT2(protein="JAK2")
gen_with_GPT2(protein="DRD2")

Protein: JAK2
  Prompts: ['N#CCC1(n', 'CN(c1ncn', 'CS(=O)(=', 'CC(C)(C)', 'CN1CCN(c']
	Generating using seed N#CCC1(n] ...
	Generating using seed CN(c1ncn] ...
	Generating using seed CS(=O)(=] ...
	Generating using seed CC(C)(C)] ...
	Generating using seed CN1CCN(c] ...

PROCESSING MOLECULE: N#CCC1(n2ccc(=O)c(Br)c2)c(Cl)c1
Running Gnina command: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|

[06:52:42] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2c(N)c(C#N)c2ccccc21'
[06:52:42] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2c(N)c(C#N)c2ccccc21'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -6.12        6.16       0.5483      6.313

gnina output result line captured: 1       -6.12        6.16       0.5483      6.313



[06:54:21] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2cnc2ccccc21'
[06:54:21] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2cnc2ccccc21'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -5.26        0.28       0.7081      5.753

gnina output result line captured: 1       -5.26        0.28       0.7081      5.753



[06:54:50] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2nc(C#N)c(N2CCCCC2)c1Cl'
[06:54:50] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2nc(C#N)c(N2CCCCC2)c1Cl'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -6.77       -0.07       0.2036      5.758

gnina output result line captured: 1       -6.77       -0.07       0.2036      5.758



[06:56:09] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2ncc(Br)c1C(=O)c1ccccc1F'
[06:56:09] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2ncc(Br)c1C(=O)c1ccccc1F'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -5.89       -0.29       0.6391      5.924

gnina output result line captured: 1       -5.89       -0.29       0.6391      5.924



[06:56:15] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2ccc(=O)[nH]c1=O'
[06:56:15] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2ccc(=O)[nH]c1=O'
[06:56:15] Can't kekulize mol.  Unkekulized atoms: 5 9 17
[06:56:15] Can't kekulize mol.  Unkekulized atoms: 5 9 17
[06:56:15] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2nnc2ccc(CN)cc21'
[06:56:15] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2nnc2ccc(CN)cc21'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -6.37        0.04       0.2939      6.252

gnina output result line captured: 1       -6.37        0.04       0.2939      6.252



[06:56:32] Can't kekulize mol.  Unkekulized atoms: 17 18 19 20 21 22 23
[06:56:32] Can't kekulize mol.  Unkekulized atoms: 17 18 19 20 21 22 23
[06:56:32] SMILES Parse Error: unclosed ring for input: 'N#CCC1(n2nnc(CN)c2-c2nn3c(C#N)c3c2CCCC3)c2ccccc21'
[06:56:32] SMILES Parse Error: unclosed ring for input: 'N#CCC1(n2nnc(CN)c2-c2nn3c(C#N)c3c2CCCC3)c2ccccc21'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.42        0.34       0.8557      6.874

gnina output result line captured: 1       -7.42        0.34       0.8557      6.874



[06:57:04] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2ccc(=O)cc1O'
[06:57:04] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2ccc(=O)cc1O'


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.31        0.20       0.8234      6.562

gnina output result line captured: 1       -7.31        0.20       0.8234      6.562



[06:57:41] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2nc(C(N)=O)nc2cc(F)ccc21'
[06:57:41] SMILES Parse Error: extra open parentheses for input: 'N#CCC1(n2nc(C(N)=O)nc2cc(F)ccc21'
[06:57:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12
[06:57:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12
[06:57:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 18
[06:57:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 18
[06:57:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11
[06:57:41] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.70       -0.05       0.2423      5.539

gnina output result line captured: 1       -7.70       -0.05       0.2423      5.539



[06:57:56] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9
[06:57:56] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -6.96        0.98       0.9775      6.971

gnina output result line captured: 1       -6.96        0.98       0.9775      6.971



[06:58:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 15 16
[06:58:09] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 15 16


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -6.56        0.21       0.3529      6.037

gnina output result line captured: 1       -6.56        0.21       0.3529      6.037



[06:58:16] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 19
[06:58:16] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 19


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -5.99        0.60       0.8946      5.420

gnina output result line captured: 1       -5.99        0.60       0.8946      5.420



[06:58:22] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 15 16
[06:58:22] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 15 16


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -6.88       -0.24       0.7943      6.240

gnina output result line captured: 1       -6.88       -0.24       0.7943      6.240



[06:58:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 12
[06:58:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 12
[06:58:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 14 15
[06:58:35] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 14 15


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.34        0.06       0.7226      6.398

gnina output result line captured: 1       -7.34        0.06       0.7226      6.398



[06:58:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 9 10 11 12
[06:58:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 9 10 11 12


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.35        1.39       0.9678      6.509

gnina output result line captured: 1       -7.35        1.39       0.9678      6.509



[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 12 13 18 19
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 12 13 18 19
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 15 16
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 15 16
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 17
[06:58:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 17


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -5.67        0.72       0.9275      5.468

gnina output result line captured: 1       -5.67        0.72       0.9275      5.468



[06:59:00] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 16 17
[06:59:00] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 16 17


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.31        2.56       0.8212      7.051

gnina output result line captured: 1       -7.31        2.56       0.8212      7.051



[06:59:06] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 13
[06:59:06] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 13


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -5.86        0.82       0.9370      5.173

gnina output result line captured: 1       -5.86        0.82       0.9370      5.173



[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 15 16
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 15 16
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12
[06:59:24] Explicit valence for atom # 10 C, 6, is greater than permitted
[06:59:24] Explicit valence for atom # 10 C, 6, is greater than permitted
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 16
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 16
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 12 13 20
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 12 13 20
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 17 18
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 17 18
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12
[06:59:24] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.56        1.59       0.9638      6.581

gnina output result line captured: 1       -7.56        1.59       0.9638      6.581



[06:59:36] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12
[06:59:36] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 11 12


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -5.95        0.65       0.8511      5.543

gnina output result line captured: 1       -5.95        0.65       0.8511      5.543



[06:59:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 12 13 20
[06:59:42] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 12 13 20


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.67       -0.45       0.4087      6.234

gnina output result line captured: 1       -7.67       -0.45       0.4087      6.234



[06:59:50] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 16
[06:59:50] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 16
[06:59:50] Explicit valence for atom # 10 C, 6, is greater than permitted
[06:59:50] Explicit valence for atom # 10 C, 6, is greater than permitted
[06:59:50] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 10 11 12 13 14 15
[06:59:50] Can't kekulize mol.  Unkekulized atoms: 2 3 4 6 7 8 9 10 11 12 13 14 15


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.50        1.93       0.7795      6.913

gnina output result line captured: 1       -7.50        1.93       0.7795      6.913



[06:59:57] Can't kekulize mol.  Unkekulized atoms: 2 3 4 10 11 12 13 14 15
[06:59:57] Can't kekulize mol.  Unkekulized atoms: 2 3 4 10 11 12 13 14 15


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/JAK2/JAK2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -7.91       -0.57       0.8546      6.996

gnina output result line captured: 1       -7.91       -0.57       0.8546      6.996



[07:30:51] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 24 25 26 27 28
[07:30:51] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 24 25 26 27 28
[07:30:51] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 17 18 19 27 31
[07:30:51] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 17 18 19 27 31


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/DRD2/DRD2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box
 | pose 0 | ligand outside box
 | pose 0 | ligand outside box
 | pose 0 | ligand outside box
 | pose 0 | ligand outside box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -9.05

[07:39:05] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 10 12 13 14 15
[07:39:05] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 10 12 13 14 15


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/DRD2/DRD2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -8.44       -0.35       0.8731      6.700

gnina output result line captured: 1       -8.44       -0.35       0.8731      6.700



[07:40:40] Explicit valence for atom # 18 O, 3, is greater than permitted
[07:40:40] Explicit valence for atom # 18 O, 3, is greater than permitted


Gnina output:
              _             
             (_)            
   __ _ _ __  _ _ __   __ _ 
  / _` | '_ \| | '_ \ / _` |
 | (_| | | | | | | | | (_| |
  \__, |_| |_|_|_| |_|\__,_|
   __/ |                    
  |___/                     

gnina v1.3 master:97fa6bc+   Built Oct  3 2024.
gnina is based on smina and AutoDock Vina.
Please cite appropriately.

Commandline: docking/gnina --config docking/DRD2/DRD2_config.txt --ligand /tmp/ligand.pdb --seed 0 --cpu 16
Using random seed: 0

0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************
 | pose 0 | initial pose not within box

mode |  affinity  |  intramol  |    CNN     |   CNN
     | (kcal/mol) | (kcal/mol) | pose score | affinity
-----+------------+------------+------------+----------
    1       -9.26        0.31       0.8090      7.379

gnina output result line captured: 1       -9.26        0.31       0.8090      7.379



In [25]:
print(pd.read_csv("results/SMILES_GPT2/JAK2_040725_0725/all.csv")["CNNaffinity"].describe())
print(pd.read_csv("results/SMILES_GPT2/DRD2_040725_0757/all.csv")["CNNaffinity"].describe())

count    205.000000
mean       6.366605
std        0.656698
min        4.077000
25%        5.954000
50%        6.391000
75%        6.870000
max        7.860000
Name: CNNaffinity, dtype: float64
count    242.000000
mean       6.968207
std        0.588735
min        5.021000
25%        6.652250
50%        7.008000
75%        7.384750
max        8.351000
Name: CNNaffinity, dtype: float64
