In [170]:
from utils.sequence_utils import translate_string_to_one_hot

translate_string_to_one_hot('AAAAA', 'ATCG').shape

(4, 5)

In [107]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [138]:
from explorers.PPO_explorer import PPO_explorer
from explorers.DynaPPO_explorer import DynaPPO_explorer
from explorers.CMAES_explorer import CMAES_explorer

from utils.sequence_utils import generate_random_mutant, generate_random_sequences

## 1. Building oracles for landscapes

In [139]:
from models.Ground_truth_oracles.RNA_landscape_models import RNA_landscape_constructor
from models.Ground_truth_oracles.TF_binding_landscape_models import *

In [140]:
landscape_constructor=TF_binding_landscape_constructor()
landscape_constructor.load_landscapes(landscapes_to_test = ['SIX6_REF_R1'])

landscape_generator = landscape_constructor.generate_from_loaded_landscapes()

#take one landscape oracle from the generator
landscape_object=next(landscape_generator)

print(landscape_object['landscape_id'])
print(landscape_object['starting_seqs'])
landscape_oracle = landscape_object['landscape_oracle']

wt = landscape_object['starting_seqs']['TF0']

1 TF landscapes loaded.
SIX6_REF_R1
{'TF0': 'TTAATTAA', 'TF1': 'GCTCGAGC', 'TF2': 'GCGCGCGC', 'TF3': 'TGCGCGCC', 'TF4': 'ATATAGCC', 'TF5': 'GTTTGGTA', 'TF6': 'ATTATGTT', 'TF7': 'CAGTTTTT', 'TF8': 'AAAAATTT', 'TF9': 'AAAAACGC', 'TF10': 'GTTGTTTT', 'TF11': 'TGCTTTTT', 'TF12': 'AAAGATAG', 'TF13': 'CCTTCTTT', 'TF14': 'AAAGAGAG'}


## 2. Building a model of the landscape

In [141]:
from utils.model_architectures import CNNa
from models.Noisy_models.Neural_network_models import NN_model

alphabet = "ATGC"

training_sequences = list(set([generate_random_mutant(wt, 0.2, alphabet) for i in range(0, 100)]))
cnn_architecture=CNNa(len(wt), alphabet=alphabet)
noisy_nn_model=NN_model(landscape_oracle, cnn_architecture)
noisy_nn_model.reset()
noisy_nn_model.update_model(training_sequences)

## 3. Exploring the landscape with a model

### CMA-ES explorer

In [200]:
cmaes_explorer = CMAES_explorer(alphabet=alphabet)
cmaes_explorer.debug = True
cmaes_explorer.set_model(noisy_nn_model)
cmaes_explorer.initialize_params()

In [204]:
cmaes_explorer.propose_samples()

  return int(np.linalg.norm(self.ps)/np.sqrt(1-(1-self.cs)**(2*self.round/self.lam))/self.chiN < 1.4 + 2/(self.N+1))


['TTTAGGAT',
 'TGTGAGAT',
 'GGTGAAAT',
 'GGTATCAT',
 'GGTATCAT',
 'GGTGTAAT',
 'TTTAAAAT',
 'TGTCTGAT',
 'TGTAAAAT',
 'TGTAACAT',
 'TGTAACAT',
 'CGTGAAAT',
 'TGTATCAT',
 'TTTGACAT',
 'CGTAATAT',
 'CGTAATAT',
 'TTTGAAAT',
 'ACTCGGAT',
 'TCTAAAAT',
 'TTTCACAT',
 'GGTGTTCT',
 'CGTAAAAT',
 'TCAGAGTT',
 'TGTGCAAT',
 'TCTGAAAT',
 'CGTCTAAT',
 'TGTGGAAT',
 'GTTAAAAT',
 'GTTAAAAT',
 'TGGAATAT',
 'TCGATGAT',
 'GGTCAGAT',
 'GGTCAAAT',
 'TCTATCAT',
 'TCTCAAAT',
 'CGTCACAT',
 'ACCGGGAT',
 'TGTACATT',
 'TTTGAACT',
 'GTTCAGAT',
 'GGTCCCAT',
 'CGGGGGAT',
 'GGGGTGAT',
 'AGTCAAAT',
 'GTTGAAAT',
 'GATATAGT',
 'TCTGTATT',
 'TTGAAAAT',
 'TTGGAAAT',
 'CGGGAACT',
 'TTCCAAAT',
 'TGGCAAAT',
 'TGAAACAT',
 'GCTCAAAT',
 'TCGAAAAT',
 'CTTGGAAT',
 'CGAATCAT',
 'TATAATAT',
 'TATAATAT',
 'GTTGACAT',
 'TATCGTAT',
 'CATCAGAT',
 'TGACGCAT',
 'GCTGTAGT',
 'CGTCATTT',
 'TGTGCCGT',
 'GCGAAGAT',
 'TCTGCTAT',
 'GATCAAAT',
 'AGGCAAAT',
 'TCTGAAGT',
 'TCTGTTGT',
 'CTTGATAT',
 'CATACGAT',
 'GCAGTAAT',
 'TGCACAAT',
 'TATTACAT',

In [207]:
num_of_rounds = 5
cmaes_explorer.run(num_of_rounds, verbose=True)

round: 0, cost: 968, evals: 1393, top: 0.989691996866367


  x = np.random.multivariate_normal(self.mean, (self.sigma**2)*self.cov)


round: 1, cost: 1060, evals: 1485, top: 0.989691996866367


  x = np.random.multivariate_normal(self.mean, (self.sigma**2)*self.cov)


round: 2, cost: 1159, evals: 1584, top: 0.989691996866367


  x = np.random.multivariate_normal(self.mean, (self.sigma**2)*self.cov)


round: 3, cost: 1257, evals: 1682, top: 0.989691996866367


  x = np.random.multivariate_normal(self.mean, (self.sigma**2)*self.cov)


round: 4, cost: 1354, evals: 1779, top: 0.989691996866367


  x = np.random.multivariate_normal(self.mean, (self.sigma**2)*self.cov)


### DyNA-PPO explorer

In [137]:
from models.Noisy_models.Ensemble import Ensemble_models

ensemble_model = Ensemble_models(list_of_models=[NN_model(landscape_oracle, cnn_architecture),
                                                NN_model(landscape_oracle, cnn_architecture),
                                                NN_model(landscape_oracle, cnn_architecture)])
ensemble_model.reset()
ensemble_model.update_model(training_sequences)

dynappo_explorer = DynaPPO_explorer(
    alphabet=alphabet,
    oracle=landscape_oracle,
    num_experiment_rounds=1,
    num_model_rounds=1)
dynappo_explorer.debug = True
dynappo_explorer.set_model(ensemble_model)

dynappo_explorer.learn_policy()

ValueError: No candidate models passed threshold.

In [134]:
num_of_rounds = 5
dynappo_explorer.run(num_of_rounds, verbose=True)

round: 0, cost: 112, evals: 495, top: 0.8759225662804602
round: 1, cost: 212, evals: 1074, top: 0.8813239599224838
round: 2, cost: 312, evals: 1758, top: 0.9884550364903311
round: 3, cost: 412, evals: 2508, top: 0.9884550364903311
round: 4, cost: 512, evals: 3363, top: 0.9884550364903311


### PPO explorer

In [104]:
ppo_explorer = PPO_explorer(
    alphabet=alphabet)
ppo_explorer.debug = True
ppo_explorer.set_model(noisy_nn_model)

In [105]:
ppo_explorer.pretrain_agent()

Episodes: 0/100
Number of measured sequences: 54
New top sequence: (0.6781429101554447, 'TTAATTAA', 5)


In [106]:
noisy_nn_model=NN_model(landscape_oracle, cnn_architecture)
noisy_nn_model.reset()
noisy_nn_model.update_model(training_sequences)

num_of_rounds = 5
ppo_explorer.run(num_of_rounds, verbose=True)

round: 0, cost: 5, evals: 98, top: 0.6781429101554447
54
round: 1, cost: 105, evals: 246, top: 0.8407825835979055
154
round: 2, cost: 205, evals: 489, top: 0.9295138745722179
254
round: 3, cost: 305, evals: 749, top: 0.9453057353729436
354
round: 4, cost: 405, evals: 1002, top: 0.9453057353729436
454
