In [1]:
using Genie, KitMSA, PyPlot, JLD2, Statistics, StatsBase, DCAUtils


MSA_train = Int8.(fasta2matrix("../files_nobu_evolution_exp/natural_seqs/B1_all_Sevan_hmm03_max010rowgaps_max075flankcolgaps_max080wtid.fasta")')

@load "../data_Genie/pars_beta_B1_integrated.jld2"

w_fra = readdlm("../data_Genie/Weights_leonardo.dat")

L,M = size(MSA_train); q = 21;

start_msa = hcat([rand(1:q, L) for _ in 1:2000]...); @time res = run_evolution(start_msa, 
    h, 
    J,
    p = 0.5, 
    temp = 1., 
    N_steps = 5*10^7,  
    N_points = 20, 
    verbose = true);

Genie.check_equilibration_with_weight("../figures/beta_B1_fra_weights/", MSA_train, res.step_msa, res.steps, w_fra)
check_equilibration("../figures/beta_B1/", MSA_train, res.step_msa, res.steps)


Genie.check_timescales(res.step_msa, 
    MSA_train, h, J, res.steps, "../figures/beta_B1/dynamics.png");

f_nat = pair_dist_freq(MSA_train; n_seq = 1000);
f_sim = pair_dist_freq(res.step_msa[end]; n_seq = 1000);
check_pairwise(f_nat, f_sim, "../figures/beta_B1/pair_dist.png")

check_energy("../figures/beta_B1/energy.png", MSA_train, res.step_msa[end], h, J)




In [None]:
using Genie, KitMSA, PyPlot, JLD2, Statistics, StatsBase, DCAUtils

MSA_train = Int8.(fasta2matrix("../files_nobu_evolution_exp/natural_seqs/B1_all_Sevan_hmm03_max010rowgaps_max075flankcolgaps_max080wtid.fasta")')
@load "../data_Genie/pars_beta_B1_integrated.jld2"
L, M = size(MSA_train);
# VIM-2 (there is a DMS of this sequence)
VIM2 = fasta2matrix("../files_nobu_evolution_exp/wts/VIM2_V185I_hmmstruct_symfrac03_max075flankcolgaps.fasta");

# NDM-1 (there is a DMS of this sequence)
NDM = fasta2matrix("../files_nobu_evolution_exp/wts/NDM1_hmmstruct_symfrac03_max075flankcolgaps.fasta");

# IMP-1, BCII-1 (those 2 are clinically relevant, DMS are likely being done soon. 
# We have another work with Nobu in which they are testing the funcionality of dozens of sequences generated aroud those 2, 
# at distances 11%, 22% and 33%)
IMP = fasta2matrix("../files_nobu_evolution_exp/wts/IMP-1.fasta");
BCII = fasta2matrix("../files_nobu_evolution_exp/wts/BcII-1.fasta");

#other relevant wts
VIM1 = fasta2matrix("../files_nobu_evolution_exp/wts/VIM1.fasta");
IND = fasta2matrix("../files_nobu_evolution_exp/wts/IND1.fasta");
CcrA = fasta2matrix("../files_nobu_evolution_exp/wts/CcrA.fasta");

imp_seqs = Int8.(hcat([VIM2,NDM,IMP,BCII,VIM1,IND,CcrA]...)); N_start_seq_imp = size(imp_seqs,2);

cdes_imp = [mean(cont_dep_entr(imp_seqs[:,i], h, J)) for i in 1:N_start_seq_imp];
cdes = [mean(cont_dep_entr(MSA_train[:,i], h, J)) for i in 1:M];

cc = ["blue", "red", "green", "yellow", "brown", "pink", "purple"];
names = ["VIM2", "NDM", "IMP", "BCII", "VIM1", "IND", "CcrA"];



# N_steps should be 10^6
N_steps = L*10^5; N_chains = 500; N_start_seq = 50; NN_points = 300; steps = unique([
        trunc(Int,10^y) for y in range(log10(1), log10(N_steps), 
                length=NN_points)]); sweeps = steps ./ L; N_points = length(steps); res_all = [
    ]; hams_all = zeros(N_start_seq, N_points, N_chains); idx_seqs = sample(collect(1:size(MSA_train,2)), ProbabilityWeights(w), 
    N_start_seq); start_seq = [MSA_train[:, idx_seqs[i]] for i in 1:N_start_seq]; w_seqs = w[idx_seqs];

#=@time for n in 1:N_start_seq
    res = run_evolution(Int8.(hcat([start_seq[n] for i in 1:N_chains]...)), 
        h, 
        J, 
        p = 0.5, 
        temp = 1.0, 
        N_points = NN_points, 
        N_steps = N_steps);
    if n%10 == 0
        println("Initial seq $(n)")
    end
    push!(res_all, res) 
    hams_all[n,:,:] .= ham_dist(res.step_msa)
end




hams_single = zeros(N_start_seq, N_points); chi_dyn_single = zeros(N_start_seq, N_points);
for i in 1:N_start_seq
    for n in 1:N_points
        hams_single[i,n] = mean(ham_dist(res_all[i].step_msa[1], res_all[i].step_msa[n]))
        chi_dyn_single[i,n] = var(ham_dist(res_all[i].step_msa[1], res_all[i].step_msa[n]))
    end
end

mean_chi_dyn_single = mean(chi_dyn_single, Weights(w_seqs), dims = 1)[1,:];
hams_single_all = mean(hams_all,dims = 3)[:,:,1]; mean_hams_single_all = mean(hams_single_all, Weights(w_seqs), dims = 1)[1,:];
=#

res_all_imp = []; hams_all_imp = zeros(N_start_seq_imp, N_points, N_chains
    ); start_seq_imp = [imp_seqs[:,i] for i in 1:N_start_seq_imp]; 

@time for n in 1:N_start_seq_imp
    res = run_evolution(Int8.(hcat([start_seq_imp[n] for i in 1:N_chains]...)), 
        h, 
        J, 
        p = 0.5, 
        temp = 1.0, 
        N_points = NN_points, 
        N_steps = N_steps);
        println("Initial seq $(n)")
    push!(res_all_imp, res) 
    hams_all_imp[n,:,:] .= ham_dist(res.step_msa)
end

hams_single_imp = zeros(N_start_seq_imp, N_points); chi_dyn_single_imp = zeros(N_start_seq_imp, N_points);
for i in 1:N_start_seq_imp
    for n in 1:N_points
        hams_single_imp[i,n] = mean(ham_dist(res_all_imp[i].step_msa[1], res_all_imp[i].step_msa[n]))
        chi_dyn_single_imp[i,n] = var(ham_dist(res_all_imp[i].step_msa[1], res_all_imp[i].step_msa[n]))
    end
end

hams_single_all_imp = mean(hams_all_imp,dims = 3)[:,:,1];



close("all")
#=plt.plot()
for i in 1:N_start_seq
    plt.plot(sweeps, chi_dyn_single[i,:] ./ (L^2), alpha = 0.1, color = "grey")
end
plt.plot(sweeps, mean(chi_dyn_single, Weights(w_seqs), dims = 1)[1,:] ./ (L^2), linewidth = 2.0, color = "black", label = "chi_dyn")
=#

for i in 1:N_start_seq_imp
    plt.plot(sweeps, chi_dyn_single_imp[i,:] ./ (L^2), linewidth = 2.0, color = cc[i], label = names[i])
end


plt.legend()
plt.xlabel("MCMC sweeps")
plt.xscale("log")
plt.ylabel("chi_dyn_A")
plt.savefig("../single_wt_chi_dyn_imp.png")



close("all")
plt.plot()
#=for i in 1:N_start_seq
    plt.plot(sweeps, hams_single_all[i,:] ./ L , alpha = 0.1, color = "grey")
end
plt.plot(sweeps, mean_hams_single_all ./ L, linewidth = 2.0, color = "black", label = "<[H_A]>")
=#

for i in 1:N_start_seq_imp
    plt.plot(sweeps, hams_single_all_imp[i,:] ./ L , linewidth = 2.0, color = cc[i], label = names[i])
end



plt.legend()
plt.xlabel("MCMC sweeps")
plt.xscale("log")
plt.ylabel("[H_A]")
plt.savefig("../all_single_wt_mean_ham_dist_imp.png")


close("all"); plt.scatter(cdes_imp, hams_single_imp[:,57]./L); plt.xlabel("CDE^A"); plt.ylabel(
    "[H^A]"); savefig("../CDE_vs_ham.png")

close("all"); plt.scatter(cdes_imp, [maximum(chi_dyn_single_imp[i,:]) for i in 1:N_start_seq_imp]
    ); plt.xlabel("CDE^A"); plt.ylabel("Max chi_dyn^A"); savefig("../CDE_vs_max_chi_dyn.png")

mean(cdes)
std(cdes)
extrema(cdes)
cdes_imp

In [None]:
MSA_train = Int8.(fasta2matrix("../files_nobu_evolution_exp/natural_seqs/B1_all_Sevan_hmm03_max010rowgaps_max075flankcolgaps_max080wtid.fasta")')

h_int, J_int = extract_params("../files_nobu_evolution_exp/params/Parameters_B1all_YI.dat.gz")

L,M = size(MSA_train);
ps = exp.(h_int) ./ sum(exp.(h_int));
fs = reshape(compute_weighted_frequencies(Int8.(MSA_train), 22, 0.2)[1], (21, L));

h = copy(h_int);
J = permutedims(J_int, [1,3,2,4]);
size(J)
