In [1]:
import os
import torch


drive_prefix = '/Users/wfr5091/Google Drive/Shared drives/Polymers-Data'

model_ensemble = []
model_path = os.path.join(drive_prefix, 'models', 'gru-opt-cv10-sym')
for i in range(10):
    model = torch.jit.load(os.path.join(model_path, f'fold-{i:02d}-scripted.pt'), map_location='cpu')
    model.eval()
    model_ensemble.append(model)

In [2]:
import numpy as np

# choose a target at random
archetype_sequences = {'liquid': 'BAABAABAABAABABBAAAB',
                     'membrane': 'ABAAABBAAAABBABBBAAA',
                     'spherical micelle': 'BBBBBBBBAAAAAAAAAAAA',
                     'string': 'ABBABABAAAAABABBABAA',
                     'vesicle': 'BABBBAABAAAABAABAABA',
                     'wormlike micelle': 'BBAAABBBAAAABBBAAAAA',
                     }
archetype_targets = {'liquid': np.array([20.471924,  2.369606]),
                     'membrane': np.array([1.214188, 9.048346]),
                     'spherical micelle': np.array([ 2.774236, -4.223681]),
                     'string': np.array([-4.319901,  7.762145]),
                     'vesicle': np.array([ 6.136077, 10.73103 ]),
                     'wormlike micelle': np.array([-4.627726,  0.183026]),
                     }
archetype_predictions = {'liquid': np.array([18.177, 2.932]),
                     'membrane': np.array([1.132, 8.499]),
                     'spherical micelle': np.array([2.901, -4.351]),
                     'string': np.array([-4.079, 6.662]),
                     'vesicle': np.array([6.498, 8.414]),
                     'wormlike micelle': np.array([-3.771, 0.172]),
                     }
archetype_plaintext = {'spherical micelle': 'spherical micelles',
                       'membrane': 'sheet-like membranes',
                       'wormlike micelle': 'worm-like micelles',
                       'liquid': 'amorphous liquid droplets',
                       'string': 'percolating, string-like aggregates',
                       'vesicle': 'vesicles',
                       }
# morph = np.random.choice(list(archetype_targets.keys()), 1)[0]
morph = 'string'
target = archetype_predictions[morph]
target_description = archetype_plaintext[morph]
print(morph, ';', target_description, ';', target)

string ; percolating, string-like aggregates ; [-4.079  6.662]


In [3]:
# import importlib
import model_utils
# importlib.reload(model_utils)

for k in archetype_predictions.keys():
    print(model_utils.run_sequences([archetype_sequences[k]], model_ensemble))

BAABAABAABAABABBAAAB: 18.177, 2.932
ABAAABBAAAABBABBBAAA: 1.132, 8.499
BBBBBBBBAAAAAAAAAAAA: 2.901, -4.351
ABBABABAAAAABABBABAA: -4.079, 6.662
BABBBAABAAAABAABAABA: 6.498, 8.414
BBAAABBBAAAABBBAAAAA: -3.771, 0.172


In [4]:
# check best-case scenarios
# generate all sequences by composition
n = 20
all_seq_by_frac = {k: set() for k in range(n+1)}
limit = 2**n
for i in range(limit):
    sequence = bin(i)[2:].zfill(n)
    mirror_sequence = sequence[::-1]
    if sequence <= mirror_sequence:
        all_seq_by_frac[sequence.count('1')].add(sequence)

# create a master list of all possible sequences
all_sequences = []
for k, v in all_seq_by_frac.items():
    all_sequences += v
print(f'generated {len(all_sequences)} sequences')

generated 524800 sequences


In [5]:
import model_utils

possible_sequences = all_seq_by_frac[8]
print(f'choosing from {len(possible_sequences)} sequences')

these_sequences = [it.replace('0', 'A').replace('1', 'B') for it in possible_sequences]    
out = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)

choosing from 63090 sequences


In [79]:
s = 'ABBABABAAAAABABBABAA'.replace('A', '0').replace('B', '1')
print(s in possible_sequences)
print(s[::-1] in possible_sequences)

False
True


In [81]:
# [it.split(':')[1].strip() for it in out.split('\n')[:25]]
[it for it in out.split('\n')[:25]]

['AABABBABAAAAABABABBA: 0.000',
 'AAABBABABAAAAABBBABA: 0.025',
 'AAABBABBAAAAABABAABB: 0.038',
 'AAABBBABAAAAABBAABAB: 0.054',
 'AABBAABBAAAAABABBAAB: 0.073',
 'AAABABBBAAAAABBAABAB: 0.094',
 'AAABBABABAAAABBBAAAB: 0.102',
 'AAABBBABAAAAABBABAAB: 0.127',
 'AAABABBBAAAAABBABAAB: 0.132',
 'AAABBABBAAAAABAABBAB: 0.152',
 'AAABBABABAAAAABABBBA: 0.165',
 'AABBABABAAAAABABABBA: 0.177',
 'AAABBABABAAAABBABBAA: 0.180',
 'AABABABBAAAAABABBAAB: 0.181',
 'AAABBABBAAAABBABBAAA: 0.183',
 'AAABABBABAAAAABBBABA: 0.188',
 'AAABABBBAAAAABABBAAB: 0.197',
 'AABBAABBAAAAABBABAAB: 0.207',
 'AABBABABAAAAABBAABBA: 0.216',
 'AAABABBBAAAAABABABAB: 0.218',
 'AAABBBABAAAAABABBAAB: 0.220',
 'AAABBABABAAAABAABBAB: 0.242',
 'AABBAABBAAAAABBAABAB: 0.243',
 'AAABABBABAAAABBBAAAB: 0.245',
 'AAABBABBAAAAABBBAAAB: 0.245']

In [52]:
AB = {'A': 0, 'B': 1}
int_seq = np.array([[int(AB[x]) for x in s] for s in ['ABBABABAAAAABABBABAA']])

predictions = []
for model in model_ensemble:
    out = model_utils.predict_from_model(model, int_seq)
    predictions.append(out)
z_pred = np.array(predictions).mean(axis=0)
target = z_pred[0]
print(target)

[-4.078578  6.661681]


In [12]:
import message_utils, model_utils

# ask for inputs (batch 1)
response = """
AAAABBBAAABBAAABBBAA
BAAABBAAAAABBAAABBBB
BBAABBAAAABBAAABBAAA
BBBAABAABAABAABAABAA
AABABABBAAABAABBAABA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

AAAABBBAAABBAAABBBAA: 5.471
BBAABBAAAABBAAABBAAA: 6.823
BAAABBAAAAABBAAABBBB: 12.082
BBBAABAABAABAABAABAA: 14.911
AABABABBAAABAABBAABA: 17.176


In [13]:
# ask for inputs (batch 2)
response = """
AAABABBAAABBAAABBBAA
AAAABBBAAABBABAABBAA
AAAABABABABBAAABBBAA
BAAABABABABBAAABABAA
ABAABABABABAABABABAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

AAAABBBAAABBABAABBAA: 5.607
AAAABABABABBAAABBBAA: 6.194
AAABABBAAABBAAABBBAA: 8.339
BAAABABABABBAAABABAA: 18.484
ABAABABABABAABABABAA: 23.832


In [14]:
# ask for inputs (batch 3)
response = """
BAABABBAAABBAAAABBAA
AAABABBAAABAAABBAABB
AAABABBAABABBAABBAAB
BAABBAABAABBAABAABBA
BAABABAABBAAABBAAAAB
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

AAABABBAAABAAABBAABB: 8.363
BAABABBAAABBAAAABBAA: 8.727
AAABABBAABABBAABBAAB: 10.634
BAABABAABBAAABBAAAAB: 17.967
BAABBAABAABBAABAABBA: 23.106


In [15]:
# ask for inputs (batch 4)
response = """
AAABBBAAAABBAAABBBAA
BBBAAAABBAAABBBAAAAA
ABBAABBAABBAAAAABBAA
BBAAAABBAAABBAAAAABB
BBBBAAAAAAABBBBAAAAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

BBBAAAABBAAABBBAAAAA: 2.764
BBBBAAAAAAABBBBAAAAA: 3.195
AAABBBAAAABBAAABBBAA: 8.745
ABBAABBAABBAAAAABBAA: 9.307
BBAAAABBAAABBAAAAABB: 16.073


In [16]:
# ask for inputs (batch 5)
response = """
BBBAAABBBAAABBAAAAAA
BBAAAAAABBBBAAAAAABB
AAABBBAAABBBAAABBAAA
ABBBAAABBAAAABBBAAAA
AABBBAAABBBAAAABBAAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

ABBBAAABBAAAABBBAAAA: 4.396
BBBAAABBBAAABBAAAAAA: 4.996
AAABBBAAABBBAAABBAAA: 9.317
AABBBAAABBBAAAABBAAA: 10.083
BBAAAAAABBBBAAAAAABB: 19.276


In [17]:
# ask for inputs (batch 6)
response = """
AAABBBAAAABBAAAABBBA
AAABBBAAAABBAABBBAAA
AABBABAAAABBAAABBBAA
AAABBBAAABBAAAABBBAA
AAABABBAAABBAAABBBAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

AAABBBAAAABBAABBBAAA: 6.517
AAABABBAAABBAAABBBAA: 8.339
AAABBBAAABBAAAABBBAA: 9.547
AAABBBAAAABBAAAABBBA: 9.727
AABBABAAAABBAAABBBAA: 10.204


In [28]:
# ask for inputs (batch 7)
response = """
AAABBBAAAABBAAABBBAA
AABBBAAAABBAAABBBAAA
ABBBAAAABBAAABBBAAAA
AABBBAAAABBAAABBBAAA
AAABBBAAAABBAAABBBAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

ABBBAAAABBAAABBBAAAA: 5.322
AAABBBAAAABBAAABBBAA: 8.745
AAABBBAAAABBAAABBBAA: 8.745
AABBBAAAABBAAABBBAAA: 9.547
AABBBAAAABBAAABBBAAA: 9.547


In [None]:
# ask for inputs (batch 8)
response = """
AAABBBAAAABBAAABBBAA
AABBBAAAABBAAABBBAAA
ABBBAAAABBAAABBBAAAA
AABBBAAAABBAAABBBAAA
AAABBBAAAABBAAABBBAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

# for strings

In [45]:
import message_utils, model_utils

# ask for inputs (batch 1)
response = """
AAAABBBAAABBAAABBBAA
BAAABBAAAAABBAAABBBB
BBAABBAAAABBAAABBAAA
BBBAABAABAABAABAABAA
AABABABBAAABAABBAABA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

BBAABBAAAABBAAABBAAA: 2.819
AAAABBBAAABBAAABBBAA: 5.672
BBBAABAABAABAABAABAA: 12.778
BAAABBAAAAABBAAABBBB: 12.982
AABABABBAAABAABBAABA: 15.278


In [46]:
# ask for inputs (batch 1)
response = """
AAAABBBAAABBAAABBBAA
AAAABBBAABBBAAABBBAA
AAAABBBAAABBBAABBBAA
AAAABBBAABBBAAABBAAA
AAABBBAAAABBAAABBBAA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

AAAABBBAAABBAAABBBAA: 5.672
AAABBBAAAABBAAABBBAA: 6.013
AAAABBBAAABBBAABBBAA: 6.523
AAAABBBAABBBAAABBBAA: 7.037
AAAABBBAABBBAAABBAAA: 8.873


In [82]:
# ask for inputs (batch 1)
response = """
AABABBABAAAAABABABBA
AAABBBABAAAAABABABBA
AABABBABAAAAAABBABBA
AABABBABAAAAABAABBBA
AABABBBAAAAAABABABBA
"""

# generate outputs
these_sequences = message_utils.extract_AB_substrings(response)
outcome = model_utils.evaluate_sequences(these_sequences, target, model_ensemble)
print(outcome)

AABABBABAAAAABABABBA: 0.000
AAABBBABAAAAABABABBA: 0.387
AABABBABAAAAABAABBBA: 0.523
AABABBBAAAAAABABABBA: 0.679
AABABBABAAAAAABBABBA: 1.019
