In [None]:
%reload_ext autoreload

In [None]:
%autoreload 2
%matplotlib inline

import editdistance
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

## Make analysis graphs for consistency/independence/robustness

In [None]:
def get_top_sequence_per_batch(data):
    top_per_batch=[]
    for i in data.batch.unique():
        sub_data=data[data.batch==i] #must also group by model type and landscape id
        #print (sub_data.true_score.max())
        top_per_batch.append(sub_data.true_score.max())
    return (top_per_batch)

def compute_cumulative_outcomes(data): 
    outcomes=pd.DataFrame(columns=["landscape_id","start_id","model_type","explorer_type",
                                   "batch","max_fitness","cum_max_fitness"])
    for landscape in data.landscape_id.unique():
        if landscape==str(-1): #or start_id==str(-1):
            continue
        print(landscape)
        valid_start_ids=[x for x in data.start_id.unique()]# if landscape[landscape.index("L"): landscape.index("R")] in x]
        for start_id in valid_start_ids:
            for explorer_type in data.explorer_type.unique():
                sub_data = data[(data.landscape_id==landscape)&\
                               (data.start_id==start_id)&\
                               (data.explorer_type==explorer_type)] 
                cum_fit=0
                for batch, max_fitness in enumerate(get_top_sequence_per_batch(sub_data)):
                    if max_fitness>cum_fit:
                       cum_fit = max_fitness 
                    outcomes = outcomes.append(pd.DataFrame.from_records([{"landscape_id":landscape,\
                                          "start_id":start_id,\
                                          "explorer_type": explorer_type,\
                                          "batch": batch+1, \
                                          "max_fitness": max_fitness,\
                                          "cum_max_fitness": cum_fit}])) 
                    
    return outcomes

In [None]:
data1=pd.read_csv("../simulations/evaluations_test_six6/consistency_robustness_independence/Greedy_mu1_tr0.05_r0.1_rho1.csv",index_col=False)
data2=pd.read_csv("../simulations/evaluations_test_six6/consistency_robustness_independence/MLWFG_mu1_r0.1_rho1_beta100.csv",index_col=False)
data3 = pd.read_csv("../simulations/eval/consistency_robustness_independence/DQN_Explorer.csv")
data3 = data3[(data3['landscape_id'] == 'SIX6_REF_R1')]
data = data1.append(data2).append(data3)
sub_data=data[(data.model_type=="NAMb_ss0.9") & (data.start_id == 'TF0')]
outcomes = compute_cumulative_outcomes(sub_data)
plt.figure(figsize=(12,5))
plt.title('Consistency/Robustness/Independence, TF0')
sns.lineplot(x="batch",y="cum_max_fitness",hue="explorer_type",data= outcomes, color="r")

In [None]:
data1=pd.read_csv("../simulations/eval/adaptivity/CMAES.csv", index_col=False)
data2=pd.read_csv("../simulations/eval/adaptivity/DQN_Explorer.csv", index_col=False)
data3 = pd.read_csv("../simulations/eval/adaptivity/DynaPPO_Agent_0.5_5_10.csv", index_col=False)
data4 = pd.read_csv("../simulations/eval/adaptivity/PPO_Agent.csv", index_col=False)
data = data1.append(data2).append(data3).append(data4)
sub_data=data[(data.model_type=="NAMb_ss0.9") & (data.start_id == 'TF0') & (data['landscape_id'] == 'SIX6_REF_R1')]
outcomes = compute_cumulative_outcomes(sub_data)
plt.figure(figsize=(12,5))
plt.title('Adaptivity, TF0')
sns.lineplot(x="batch",y="cum_max_fitness",hue="explorer_type",data= outcomes, color="r")

In [None]:
data3 = pd.read_csv("../simulations/eval/consistency_robustness_independence/DQN_Explorer.csv")
data3 = data3[(data3['start_id'] == 'TF0') & (data3['landscape_id'] == 'SIX6_REF_R1') & 
              (data3['model_type'] == 'NAMb_ss0.9')]
outcomes3 = compute_cumulative_outcomes(data3)
plt.figure(figsize=(12,5))
sns.lineplot(x="batch",y="cum_max_fitness",hue="explorer_type",data=outcomes3, color="r")

## Run analysis graphs for other simulation types 

In [None]:
def compute_sequential_edit_distances(seq_df, cutoff=0):
    # seq_df is a dataframe with the same format as that of the output logs 
    seqs_fitnesses = seq_df[['sequence', 'true_score']]
    seqs_fitnesses = seqs_fitnesses[seqs_fitnesses['true_score'] >= cutoff]
    seqs_fitnesses.sort_values('true_score', ascending=False, inplace=True)
    seqs = seqs_fitnesses['sequence'].values
    distances = []
    for i in range(len(seqs) - 1):
        comp_seq = seqs[i+1]
        top_seqs = seqs[:(i+1)]
        distances.append([editdistance.eval(seq, comp_seq) for seq in top_seqs])
    return distances 

def compute_all_edit_distances(seq_df, cutoff=0):
    seqs = seq_df['sequence'].values
    distances = []
    for seq in seqs:
        distances.append([editdistance.eval(seq, seq2) for seq2 in seqs])
    return distances

def compare_sequential_edit_distances(batches):
    # put in dictionary consisting of explorer: batch 
    for name, batch in batches.items():
        sequential_distances = compute_sequential_edit_distances(batch)
        counts, bin_edges = np.histogram(list(itertools.chain.from_iterable(sequential_distances)))
        counts = counts * 1.0 / counts.sum()
        plt.plot(bin_edges[:-1], counts, label=name)
    plt.legend()
    plt.show()
    
def compare_all_edit_distances(batches):
    # put in dictionary consisting of explorer: batch 
    for name, batch in batches.items():
        sequential_distances = compute_all_edit_distances(batch)
        counts, bin_edges = np.histogram(list(itertools.chain.from_iterable(sequential_distances)))
        counts = counts * 1.0 / counts.sum()
        plt.plot(bin_edges[:-1], counts, label=name)
    plt.legend()
    plt.show()

In [None]:
file_names = {'DQN': 'DQN_efficiency.csv', 
              'Greedy': 'Greedy_mu1_tr0.05_r0.2_rho1_efficiency.csv',
              'BO': 'BO_Efficiency.csv'}
batches = {}
for name, file_name in file_names.items():
    test_data = pd.read_csv("../plotting_data_subset/" + file_name)
    batch = test_data[(test_data['batch'] == 9) 
                          & (test_data['landscape_id'] == 'B1L14RNA1') 
                          & (test_data['start_id'] == 'startRNAL14_0') 
                          & (test_data['model_type'] == 'AdaENS_3_NAMb_ss1')]
    batches[name] = batch 

In [None]:
compare_sequential_edit_distances(batches)

In [None]:
compare_all_edit_distances(batches)