evaluate previous results of cas13d optimization with the new model

## Synonimous Reporters

***

In [74]:
import pandas as pd
import numpy as np
from joblib import load
from optimalcodon.projects.rnastability.predictuncertainty import predict_seq_with_uncertainty

modelo = load("../../data/19-08-08-PredictiveModelStability/predictivemodel.joblib")

In [75]:
old_optimization_13d = pd.read_csv("../19-05-23-ProteinOptimization/results_data/silentReporter_optimization_path.csv")

In [76]:
old_optimization_13d = old_optimization_13d.drop(['Unnamed: 0'], axis=1)
old_optimization_13d.sample(10)

Unnamed: 0,iteration,sequences,fitness,optimization,human | 293t | endogenous,fish | embryo mzt | aamanitin polya,mouse | mES cells | slam-seq,human | RPE | endogenous
572,28,AGACACCAATCCTTCTGTGATGAGGTGTTTGTTTGCAGTGATGAGC...,-1.520315,minimization,-1.520315,-1.503141,-1.407099,-1.475853
6662,82,AGACACCAATCCTTCTGCGACGAGGTTTTTGTATGCTCAGATGAGC...,0.440096,maximization,0.440096,-0.15277,0.324469,0.30947
2301,114,AGACATCAATCGTTCTGTGACGAGGTTTTCGTGTGCTCAGACGAGT...,-1.99899,minimization,-1.99899,-1.931072,-1.832715,-1.961984
2262,113,AGACATCAATCGTTCTGTGACGAGGTTTTCGTGTGCTCAGACGAGT...,-2.025811,minimization,-2.025811,-2.018585,-1.89673,-2.032639
5018,0,AGACACCAATCCTTCTGCGACGAGGTATTTGTGTGCAGTGATGAGC...,-0.667543,maximization,-0.667543,-0.979448,-0.674375,-0.698296
389,19,AGACACCAATCCTTCTGTGATGAGGTGTTTGTTTGCAGTGATGAGC...,-1.44609,minimization,-1.44609,-1.471324,-1.316036,-1.408833
3092,154,AGACATCAATCGTTTTGTGACGAGGTTTTCGTGTGCTCAGACGAGT...,-2.037908,minimization,-2.037908,-1.996216,-1.868462,-2.041669
1812,90,AGACATCAATCCTTCTGTGACGAGGTTTTTGTTTGTTCGGATGAAC...,-1.983654,minimization,-1.983654,-1.969327,-1.856734,-2.022124
5820,40,AGACACCAATCCTTCTGCGACGAGGTATTTGTGTGCTCAGACGAAC...,0.335293,maximization,0.335293,-0.20533,0.189028,0.243503
7871,143,AGACACCAGTCCTTCTGCGACGAGGTTTTTGTATGCTCAGATGAGT...,0.542314,maximization,0.542314,-0.022427,0.364102,0.392203


In [77]:
# get the best sequence at each iteration
def get_max(grp):
    return grp[np.abs(grp.fitness) == np.abs(grp.fitness).max()]

results = (
    old_optimization_13d
    .groupby(['iteration', 'optimization'], as_index=False)
    .apply(get_max)
    .reset_index()
    .drop(['level_0', 'level_1'], axis=1)
)


In [78]:
## predict the uncertainty

# the model parameters
mdl_params = {'specie': 'human', 'cell_type': '293t',
              'datatype': 'endogenous'}
# predict the stability
preds = results.sequences.apply(predict_seq_with_uncertainty, models=modelo, **mdl_params)

results['median_pred'] = preds.map(lambda x: x[1])
results['ci_l'] = preds.map(lambda x: x[0])
results['ci_u'] = preds.map(lambda x: x[-1])

In [79]:
results.to_csv('results_data/old_optimization_with_new_predictions_synReps.csv', index=False)

In [80]:
reporters = pd.read_csv("../19-05-23-ProteinOptimization/results_data/silent_reporters_seqs.csv").drop(['Unnamed: 0'], axis=1)
# append the mCherry part
reporters

Unnamed: 0,id_seq,seqs,predicted_stability_human293t
0,opt_100,CGCCACCAGTCCTTCTGCGACGAGGTCTTCGTCTGCTCCGACGAGC...,-0.195757
1,opt_75,AGACACcagTCCTTCTGCGACGAGGTAttcGTCTGCAGTgacGAGC...,-0.339849
2,opt_50,AGACACCAATCCTTCTGCGACGAGGTATTTGTCTGCAGTGATGAGC...,-0.626016
3,opt_25,AGACACCAAagtTTCtgtGACgaaGTATTTGTCtgtAGTGATGAGt...,-0.782974
4,suprema,AGACACCAATCCTTCTGCGATGAGGTTTTCGTATGCAGCGATGAGT...,0.607108
5,infima,AGACATCAATCGTTTTGTGACGAGGTTTTCGTGTGCTCAGACGAGT...,-2.09977
6,idt,AGACATCAATCTTTTTGCGATGAAGTCTTTGTATGTTCCGACGAAC...,-0.288133


In [81]:
# predict the final reporters
preds = reporters.seqs.apply(predict_seq_with_uncertainty, models=modelo, **mdl_params)

reporters['median_pred'] = preds.map(lambda x: x[1])
reporters['ci_l'] = preds.map(lambda x: x[0])
reporters['ci_u'] = preds.map(lambda x: x[-1])

In [82]:
reporters.to_csv("results_data/reporters_predicitons.csv", index=False)