In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
from tf_agents.environments import tf_py_environment
from tf_agents.environments.utils import validate_py_environment

from environments.PPO_environment import FitnessLandscapeEnvironment as FLEnv
from explorers.PPO_explorer import PPO_explorer

from utils.sequence_utils import generate_random_mutant, generate_random_sequences

## 1. Building oracles for landscapes

In [20]:
from models.Ground_truth_oracles.RNA_landscape_models import RNA_landscape_constructor

In [31]:
landscape_constructor = RNA_landscape_constructor()
#load a pre-specified landscapes  (here, index 0), you can also specify to just take a few of them
landscape_constructor.load_landscapes("../data/RNA_landscapes/RNA_landscape_config.yaml", landscapes_to_test=[0])
landscape_generator = landscape_constructor.generate_from_loaded_landscapes()

B1L14RNA1 loaded


In [32]:
landscape_constructor.loaded_landscapes

[{'B1L14RNA1': {'self_fold_max': False,
   'seq_size': 14,
   'conserved_start': False,
   'conserved_pattern': False,
   'targets': ['GAACGAGGCACAUUCCGGCUCGCCCGGCCCAUGUGAGCAUGGGCCGGACCCCGUCCGCGCGGGGCCCCCGCGCGGACGGGGGCGAGCCGGAAUGUGCCUC'],
   'starts': {'startRNAL14_0': 'AUGGGCCGGACCCC',
    'startRNAL14_1': 'GCCCCGCCGGAAUG',
    'startRNAL14_2': 'UCUUGGGGACUUUU',
    'startRNAL14_3': 'GGAUAACAAUUCAU',
    'startRNAL14_4': 'CCCAUGCGCGAUCA'}}}]

In [33]:
#take one landscape oracle from the generator
landscape_object=next(landscape_generator)

In [38]:
print(landscape_object['landscape_id'])
print(landscape_object['starting_seqs'])
landscape_oracle = landscape_object['landscape_oracle']
print(landscape_oracle.get_fitness('GGAUAACAAUUCAU'))
wt = landscape_object['starting_seqs']['startRNAL14_3']

B1L14RNA1
{'startRNAL14_0': 'AUGGGCCGGACCCC', 'startRNAL14_1': 'GCCCCGCCGGAAUG', 'startRNAL14_2': 'UCUUGGGGACUUUU', 'startRNAL14_3': 'GGAUAACAAUUCAU', 'startRNAL14_4': 'CCCAUGCGCGAUCA'}
0.14554841726711917


## 2. Building a model of the landscape

In [40]:
from utils.model_architectues import CNNa
from models.Noisy_models.Neural_network_models import NN_model

RNA_alphabet = "UCGA"
training_sequences = list(set([generate_random_mutant(wt, 0.2, RNA_alphabet) for i in range(0,500)]))
cnn_architecture=CNNa(len(wt), alphabet=RNA_alphabet)
noisy_nn_model=NN_model(landscape_oracle, cnn_architecture)
noisy_nn_model.reset()
noisy_nn_model.update_model(training_sequences)

## 3. Exploring the landscape with a model

In [41]:
from explorers.base_explorer import Random_explorer

mutation_rate = 0.05
explorer = Random_explorer(mutation_rate,
                           batch_size=100,
                           alphabet=RNA_alphabet,
                           virtual_screen=10)
explorer.debug = True # don't save results
explorer.set_model(noisy_nn_model)

In [43]:
num_of_rounds = 1
explorer.run(num_of_rounds, verbose=True)

round: 0, cost: 452, evals: 100, top: 0.5245235330715786


### PPO explorer

In [48]:
fle = FLEnv(alphabet=RNA_alphabet,
          starting_seq=wt,
          landscape=noisy_nn_model,
          max_num_steps=10)

validate_py_environment(fle, episodes=1)

tf_fle = tf_py_environment.TFPyEnvironment(fle)

In [49]:
agent = PPO_explorer.initialize_new_agent(tf_fle)

Instructions for updating:
SeedStream has moved to `tfp.util.SeedStream`.


In [54]:
ppo_explorer = PPO_explorer(
    ppo_agent=agent,
    tf_env=tf_fle,
    alphabet=RNA_alphabet)
ppo_explorer.debug = True
ppo_explorer.set_model(noisy_nn_model)

In [53]:
ppo_explorer.train_agent()

Episodes: 0/10000
Number of measured sequences: 599
New top sequence: (0.5245235330715786, 'GGAGCCCUGGUCUA', 552)
Episodes: 164/10000
Number of measured sequences: 660
Episodes: 347/10000
Number of measured sequences: 711
New top sequence: (0.5268175, 'GGGGCUCGUAUCCU', 552)
Episodes: 521/10000
Number of measured sequences: 762
New top sequence: (0.5278955, 'GGGCGACAGCUCCU', 552)
Episodes: 687/10000
Number of measured sequences: 822
New top sequence: (0.5659919, 'GGGCCAUGUCGCCU', 552)
Episodes: 863/10000
Number of measured sequences: 873
Episodes: 1035/10000
Number of measured sequences: 917
Episodes: 1190/10000
Number of measured sequences: 969
New top sequence: (0.59951544, 'GGCGCCCGUGUCCU', 552)
Episodes: 1360/10000
Number of measured sequences: 1015
Episodes: 1517/10000
Number of measured sequences: 1059
Episodes: 1672/10000
Number of measured sequences: 1116
New top sequence: (0.6445927, 'GGGGCGCGGGGGGU', 552)
Episodes: 1843/10000
Number of measured sequences: 1171
Episodes: 2007/1

In [60]:
num_of_rounds = 1
ppo_explorer.run(num_of_rounds, verbose=True)

round: 0, cost: 834, evals: 3378, top: 0.8403361252103332
