In [2]:
import flexs

In [3]:
import editdistance
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pprint
import numpy as np
import json

import flexs
from flexs import baselines
import flexs.utils.sequence_utils as s_utils


In [4]:
exp_name = 'RNA14'

In [5]:


problem = flexs.landscapes.rna.registry()['L14_RNA1']
pprint.pprint(problem)

landscape = flexs.landscapes.RNABinding(**problem['params'])
alphabet = s_utils.RNAA


bsize=100

{'params': {'seq_length': 14,
            'targets': ['GAACGAGGCACAUUCCGGCUCGCCCGGCCCAUGUGAGCAUGGGCCGGACCCCGUCCGCGCGGGGCCCCCGCGCGGACGGGGGCGAGCCGGAAUGUGCCUC']},
 'starts': {1: 'AUGGGCCGGACCCC',
            2: 'GCCCCGCCGGAAUG',
            3: 'UCUUGGGGACUUUU',
            4: 'GGAUAACAAUUCAU',
            5: 'CCCAUGCGCGAUCA'}}


In [6]:
nModelQueries = 2000
nRounds = 15

In [7]:
seq_length = problem['params']['seq_length']

In [8]:
import random

starting_sequence = "".join([random.choice(alphabet) for i in range(seq_length)])



In [9]:
import os

logs_dir = f'./analysis/{exp_name}'

os.listdir(logs_dir)


from datetime import datetime
def get_time():
    return datetime.now().isoformat()

## Random Explorer

In [34]:
for _ in range(3):
    cnn = baselines.models.CNN(len(starting_sequence), alphabet=alphabet,
                             num_filters=32, hidden_size=100, loss='MSE')

    random_explorer = baselines.explorers.Random(
        cnn,
        rounds=nRounds,
        mu=1,
        starting_sequence=starting_sequence,
        sequences_batch_size=100,
        model_queries_per_batch=nModelQueries,
        alphabet=alphabet
    )

    random_sequences, metadata = random_explorer.run(landscape)
    fname = "Random_Explorer_" + get_time() + ".csv"
    random_sequences.to_csv(os.path.join(logs_dir, fname))
    random_sequences

round: 0, top: 0.15378699877032953, time: 0.000262s
round: 1, top: 0.39819848574016875, time: 3.345087s
round: 2, top: 0.46959960400396916, time: 0.516236s
round: 3, top: 0.5053001369460991, time: 0.522794s
round: 4, top: 0.5080463438753877, time: 0.624389s
round: 5, top: 0.5080463438753877, time: 0.591970s
round: 6, top: 0.5657164798722858, time: 0.617488s
round: 7, top: 0.5657164798722858, time: 0.767273s
round: 8, top: 0.5657164798722858, time: 0.766474s
round: 9, top: 0.631625184277509, time: 0.876353s
round: 10, top: 0.631625184277509, time: 0.891544s
round: 11, top: 0.631625184277509, time: 0.942348s
round: 12, top: 0.631625184277509, time: 0.988282s
round: 13, top: 0.631625184277509, time: 1.035094s
round: 14, top: 0.631625184277509, time: 1.073871s
round: 15, top: 0.631625184277509, time: 1.114259s
round: 0, top: 0.15378699877032953, time: 0.000207s
round: 1, top: 0.36524413353755714, time: 3.007227s
round: 2, top: 0.5272697400008672, time: 0.512746s
round: 3, top: 0.5272697400

## Adalead Explorer

In [35]:

for _ in range(3):

    cnn = baselines.models.CNN(len(starting_sequence), alphabet=alphabet,
                             num_filters=32, hidden_size=100, loss='MSE')

    adalead_explorer = baselines.explorers.Adalead(
        cnn,
        rounds=nRounds,
        starting_sequence=starting_sequence,
        sequences_batch_size=100,
        model_queries_per_batch=nModelQueries,
        alphabet=alphabet
    )


    adalead_sequences, metadata = adalead_explorer.run(landscape)
    fname = "Adalead_Explorer_" + get_time() + ".csv"
    adalead_sequences.to_csv(os.path.join(logs_dir, fname))
    adalead_sequences


round: 0, top: 0.15378699877032953, time: 0.000225s
round: 1, top: 0.46959960400396916, time: 25.539324s
round: 2, top: 0.6508485804029885, time: 11.473732s
round: 3, top: 0.7661888000172442, time: 9.138100s
round: 4, top: 0.8128741606765285, time: 7.251759s
round: 5, top: 0.8540671074772356, time: 6.994964s
round: 6, top: 0.8925138473486541, time: 7.110831s
round: 7, top: 1.0517932206929055, time: 7.468872s
round: 8, top: 1.0517932206929055, time: 7.447524s
round: 9, top: 1.0517932206929055, time: 8.011372s
round: 10, top: 1.0517932206929055, time: 8.216094s
round: 11, top: 1.0517932206929055, time: 8.176075s
round: 12, top: 1.0517932206929055, time: 7.238011s
round: 13, top: 1.0517932206929055, time: 7.767524s
round: 14, top: 1.0517932206929055, time: 7.745489s
round: 15, top: 1.0517932206929055, time: 7.632067s
round: 0, top: 0.15378699877032953, time: 0.000216s
round: 1, top: 0.4998077754670625, time: 16.958308s
round: 2, top: 0.6700719241489276, time: 7.781839s
round: 3, top: 0.80

## Genetic Explorer

In [10]:
for _ in range(3):
    
    cnn = baselines.models.CNN(len(starting_sequence), alphabet=alphabet,
                             num_filters=32, hidden_size=100, loss='MSE')

    genetic_explorer = baselines.explorers.GeneticAlgorithm(
        cnn,

        population_size=20,
        parent_selection_strategy='wright-fisher', # wright-fisher model decides who gets to 'mate'
        beta=0.01,
        children_proportion=0.2,

        rounds=nRounds,
        starting_sequence=starting_sequence,
        sequences_batch_size=bsize,
        model_queries_per_batch=nModelQueries,
        alphabet=alphabet
    )


    genetic_algo_sequences, metadata = genetic_explorer.run(landscape)
    fname = "Genetic_Explorer_" + get_time() + ".csv"
    genetic_algo_sequences.to_csv(os.path.join(logs_dir, fname))
    genetic_algo_sequences

2022-01-25 22:05:03.310464: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


round: 0, top: 0.4201680626051666, time: 0.000516s
round: 1, top: 0.6783105449367933, time: 108.172023s
round: 2, top: 0.8266051429434308, time: 112.553208s
round: 3, top: 0.961168758683166, time: 102.583191s
round: 4, top: 0.961168758683166, time: 112.091614s
round: 5, top: 0.961168758683166, time: 107.688405s
round: 6, top: 0.961168758683166, time: 57.704073s
round: 7, top: 0.961168758683166, time: 52.915628s
round: 8, top: 0.961168758683166, time: 54.318035s
round: 9, top: 0.961168758683166, time: 52.302489s
round: 10, top: 0.961168758683166, time: 55.263631s
round: 11, top: 0.961168758683166, time: 63.992599s
round: 12, top: 0.961168758683166, time: 62.387843s
round: 13, top: 0.961168758683166, time: 52.873845s
round: 14, top: 0.961168758683166, time: 50.883334s
round: 15, top: 0.961168758683166, time: 51.396622s
round: 0, top: 0.4201680626051666, time: 0.000340s
round: 1, top: 0.6783105449367933, time: 47.585553s
round: 2, top: 0.8266051429434308, time: 45.966455s
round: 3, top: 0

## CMAES Explorer

In [11]:
for _ in range(3):
    cnn = baselines.models.CNN(len(starting_sequence), alphabet=alphabet,
                             num_filters=32, hidden_size=100, loss='MSE')

    cmaes_explorer = baselines.explorers.CMAES(
        flexs.LandscapeAsModel(landscape),

        population_size=10,
        max_iter=200,

        rounds=nRounds,
        starting_sequence=starting_sequence,
        sequences_batch_size=bsize,
        model_queries_per_batch=nModelQueries,
        alphabet=alphabet
    )

    cmaes_sequences, metadata = cmaes_explorer.run(landscape)
    fname = "CMAES_Explorer_" + get_time() + ".csv"
    cmaes_sequences.to_csv(os.path.join(logs_dir, fname))
    cmaes_sequences

round: 0, top: 0.4201680626051666, time: 0.000303s
round: 1, top: 0.5025539300168106, time: 1.982948s
round: 2, top: 0.6865491133451186, time: 2.153259s
round: 3, top: 0.7387268354834394, time: 1.932465s
round: 4, top: 0.7387268354834394, time: 2.041866s
round: 5, top: 0.7606964385382076, time: 3.523021s
round: 6, top: 0.7606964385382076, time: 4.833827s
round: 7, top: 0.777173575354858, time: 3.614939s
round: 8, top: 0.777173575354858, time: 1.910069s
round: 9, top: 0.8128741606765285, time: 2.642523s
round: 10, top: 0.9007524157569793, time: 1.946164s
round: 11, top: 0.9007524157569793, time: 2.193195s
round: 12, top: 0.9007524157569793, time: 2.050235s
round: 13, top: 0.9007524157569793, time: 2.179243s
round: 14, top: 0.9007524157569793, time: 2.053605s
round: 15, top: 0.9007524157569793, time: 5.206067s
round: 0, top: 0.4201680626051666, time: 0.000357s
round: 1, top: 0.48333058627087155, time: 6.803608s
round: 2, top: 0.5931784444060906, time: 2.140992s
round: 3, top: 0.645356166

## DynaPPO Explorer

In [12]:
for _ in range(3):

    cnn = baselines.models.CNN(len(starting_sequence), alphabet=alphabet,
                             num_filters=32, hidden_size=100, loss='MSE')

    dynappo_explorer = baselines.explorers.DynaPPO(  # DynaPPO has its own default ensemble model, so don't use CNN
        landscape=landscape,
        env_batch_size=10,
        num_model_rounds=10,
        rounds=nRounds,
        starting_sequence=starting_sequence,
        sequences_batch_size=bsize,
        model_queries_per_batch=nModelQueries,
        alphabet=alphabet,
    )

    dynappo_sequences, metadata = dynappo_explorer.run(landscape)
    fname = "DynaPPO_Explorer_" + get_time() + ".csv"
    dynappo_sequences.to_csv(os.path.join(logs_dir, fname))
    dynappo_sequences

  positive)


BoundedTensorSpec(shape=(), dtype=tf.int64, name='action', minimum=array(0), maximum=array(3))
round: 0, top: 0.4201680626051666, time: 0.000340s
Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=True)` instead.
round: 1, top: 0.5492392906760948, time: 27.114906s
round: 2, top: 0.6041632197437043, time: 25.552637s
round: 3, top: 0.6041632197437043, time: 41.923151s
round: 4, top: 0.6865491133451186, time: 50.845752s
round: 5, top: 0.6865491133451186, time: 53.996527s
round: 6, top: 0.6865491133451186, time: 55.447780s
round: 7, top: 0.6865491133451186, time: 50.893434s
round: 8, top: 0.6865491133451186, time: 55.873641s
round: 9, top: 0.7277420601458257, time: 64.665684s
round: 10, top: 0.7277420601458257, time: 73.673227s
round: 11, top: 0.7277420601458257, time: 66.918742s
round: 12, top: 0.7277420601458257, time: 63.949947s
round: 13, top: 0.7359806285541509, time: 66.207138s
round: 14, top: 0.7359806285541509, time: 70.894270s
round: 15, top: 0.7359806285541

  positive)


BoundedTensorSpec(shape=(), dtype=tf.int64, name='action', minimum=array(0), maximum=array(3))
round: 0, top: 0.4201680626051666, time: 0.000235s
round: 1, top: 0.5876860305475133, time: 23.751129s
round: 2, top: 0.6508485804029885, time: 22.286866s
round: 3, top: 0.7854121961427237, time: 37.061231s
round: 4, top: 0.7854121961427237, time: 39.500445s
round: 5, top: 0.7854121961427237, time: 41.589381s
round: 6, top: 0.7854121961427237, time: 43.209767s
round: 7, top: 0.7854121961427237, time: 47.488311s
round: 8, top: 0.7854121961427237, time: 49.717049s
round: 9, top: 0.7854121961427237, time: 54.079600s
round: 10, top: 0.7854121961427237, time: 53.592415s
round: 11, top: 0.7854121961427237, time: 62.149071s
round: 12, top: 0.7854121961427237, time: 61.656065s
round: 13, top: 0.7854121961427237, time: 65.808941s
round: 14, top: 0.7854121961427237, time: 71.938159s
round: 15, top: 0.7854121961427237, time: 73.658067s


  positive)


BoundedTensorSpec(shape=(), dtype=tf.int64, name='action', minimum=array(0), maximum=array(3))
round: 0, top: 0.4201680626051666, time: 0.000330s
round: 1, top: 0.6288789773482205, time: 22.852963s
round: 2, top: 0.724995853216537, time: 25.145255s
round: 3, top: 0.724995853216537, time: 37.127891s
round: 4, top: 0.724995853216537, time: 36.942710s
round: 5, top: 0.7661888000172442, time: 47.116694s
round: 6, top: 0.7661888000172442, time: 55.809284s
round: 7, top: 0.7661888000172442, time: 52.086127s
round: 8, top: 0.7661888000172442, time: 60.610725s
round: 9, top: 0.77442742080511, time: 55.673338s
round: 10, top: 0.77442742080511, time: 81.478679s
round: 11, top: 0.77442742080511, time: 76.304080s
round: 12, top: 0.77442742080511, time: 77.813152s
round: 13, top: 0.77442742080511, time: 96.763856s
round: 14, top: 0.77442742080511, time: 80.436016s
round: 15, top: 0.77442742080511, time: 79.684004s
