## PROTEIN GENERATOR to test runs and play around

In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, json
sys.path.append(sys.path[0]+'/utils')
sys.path.append(sys.path[0]+'/model')

with open('./examples/args.json','r') as f:
    args = json.load(f)
    
# import sampler to run inference    
from sampler import SEQDIFF_sampler
from MonoCTRL import T5_tools, MonoXLNet, MonobodyData, evaluate_seqs, inference



### Specify parameters here

In [2]:
# ARGUMENTS
start_seq = 'VSSVPTKLEVVAATPTSLLISWDA'
before_de_loop = 'YYRITYGETGGNSPVQEFTVPG'
before_fg_loop = 'TATISGLKPGVDYTITVYA'
end_seq = 'PISINYRT'

monobody_seq = start_seq + 'X'*6 + before_de_loop + 'X'*3 + before_fg_loop + 'X'*10 + end_seq
args['sequence'] = monobody_seq #None # starting sequence where X is mask token for example: XXXXXXXXXXXPEPSEQXXXXXXXXXXXX
args['pdb'] = None #'/home/jgershon/Desktop/SEQDIFF/tmp/test_000000.pdb' # path to pdb to diffuse
args['contigs'] = ['0'] # contigs for design use, to sample range use ['70-80'] aa, to add strucutral and sequence conditioning from pdb use chain
args['out'] = './examples/out/design' # path with prefix to where you want to save outputs
args['save_best_plddt'] = True # if true will save best plddt example in the trajectory
args['hotspots'] = None # residues to have model target for binder design probelms specified as a list of chain and pdb index example 'B23,B45,B66'
args['save_args'] = True # will save args as json
args['dump_trb'] = True # will save trb with meta data from run

# SECONDARY STRUCTURE BIAS (options a, b, c)
# option a
args['secondary_structure'] = None # dssp string 'XXXXXHHHHHHHXXXXLLLLXXXXEEEEEEEEEEEEEXXX' for each residue
# option b
args['dssp_pdb'] = None # path to pdb to copy dssp features of
# option c
args['helix_bias'] = 0.00 # fraction of sequence to randomly bias toward helix, recommend [0,0.05]
args['strand_bias'] = 0.00 # fraction of sequence to randomly bias toward helix, recommend [0,0.05]
args['loop_bias'] = 0.00 # fraction of sequence to randomly bias toward helix, recommend [0,0.05]

# GUIDING POTENTIALS (can use in combination)
# aa_bias
args['aa_composition'] = 'W0.2' # amino acid one letter char and fraction of seq to bias as a list 'W0.2,Y0.1'
# charge
args['target_charge'] = -10 # target charge to bias toward
args['target_pH'] = 7.4 # target ph to design in 
# hydrophobic
args['hydrophobic_score'] = -10 # hydrophobic GRAVY score to bias toward (use negative score to bias away from hyrdophobic
# for any potential used you must specify the following
args['potentials'] = 'monobody'# 'monobody' # comma seperated list of potentials used 'aa_bias,charge,hydrophobic,'
args['potential_scale'] = '1.0' # comma seperated list of weights for each potential in same order as potentials 

# DIFFUSION PARAMS
args['T'] = 25 # num steps to use (can use 10 or 15 steps to test), 25 is generally good, but use 50 or 100 for harder problems and better AF2 results

# PARTIAL DIFFUSION
args['sampling_temp'] = 1.0 # set to 1.0 for full diffusion, else entering partial diffusion mode, diffusing to specified temperature of input
args['trb'] = None # path to trb if you want to partially diffuse and use the same contigs, else you can input a pdb or sequence to partially diffuse from


# send args to sampler and initialize diffuser and model
#S.set_args(args) # Paul edit: Produced error when called, so instead just give in the args when instantiating the sampler
S = SEQDIFF_sampler(args)
S.model_init()
S.diffuser_init()

Loading model checkpoint...
Successfully loaded model checkpoint


### Generate Samples
Once you generate samples you like, use the args.json file saved to launch a production run

For example:
```
python ./inference.py -input_json ./examples/out/design_000000_args.json
```

In [36]:
for i in range(1):
    S.generate_sample()

Preparing sequence input
Using potential: monobody
Generating sample 000023 ...
tensor([19, 15, 15, 19, 14, 16, 11, 10,  6, 19, 19,  0,  0, 16, 14, 16, 15, 10,
        10,  9, 15, 17,  3,  0, 11,  7, 16, 15,  7, 11, 18, 18,  1,  9, 16, 18,
         7,  6, 16,  7,  7,  2, 15, 14, 19,  5,  6, 13, 16, 19, 14,  7,  2, 10,
         2, 16,  0, 16,  9, 15,  7, 10, 11, 14,  7, 19,  3, 18, 16,  9, 16, 19,
        18,  0, 16,  0, 14,  7, 16, 16,  5, 15,  0,  2, 14,  9, 15,  9,  2, 18,
         1, 16], device='cuda:0') 
 tensor([18, 18, 18, 18, 18, 18,  7,  7,  7, 11, 19,  7,  7, 11, 19, 19, 19,  7,
         7,  7,  7,  7, 14, 14, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
        19, 10, 10, 15, 10, 10, 10, 10, 10, 10, 15, 15, 15, 15, 15, 15, 15, 15,
        15, 15, 15, 15, 15, 10, 10, 10, 19, 10, 10, 10, 13, 13, 13, 13, 13, 13,
        10, 10, 10, 10,  7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  7,
         7, 10], device='cuda:0')
LOSS: 0.038818359375
VSSVPTKLEVVAATPTSLLISWDAKGTSG