## Testing Susceptibilities dynamical and background

In [None]:
dbd_msa  = read_fasta_alignment("../Gen.jl/data/alignments/natural/DBD_alignment.uniref90.cov80.a2m", 0.9);
@load "../pars_dbd.jld2"



N_steps = 1*10^6; N_chains = 1000; N_start_seq = 200; NN_points = 100; steps = unique([
        trunc(Int,10^y) for y in range(log10(1), log10(N_steps), 
                length=NN_points)]); N_points = length(steps); res_all = [
    ]; hams_all = zeros(N_start_seq, N_points, N_chains); w = compute_weights(dbd_msa,
    22, 0.2)[1]; idx_seqs = sample(collect(1:size(dbd_msa,2)), ProbabilityWeights(w), 
    N_start_seq); start_seq = [dbd_msa[:, idx_seqs[i]] for i in 1:N_start_seq]; w_seqs = w[idx_seqs];

@time for n in 1:N_start_seq
    res = run_evolution(Int8.(hcat([start_seq[n] for i in 1:N_chains]...)), 
        h_dbd, 
        J_dbd, 
        p = 0.5, 
        temp = 1.0, 
        N_points = NN_points, 
        N_steps = N_steps);
    if n%10 == 0
        println("Initial seq $(n)")
    end
    push!(res_all, res) 
    hams_all[n,:,:] .= ham_dist(res.step_msa)
end


hams_single = zeros(N_start_seq, N_points); chi_dyn_single = zeros(N_start_seq, N_points);
for i in 1:N_start_seq
    for n in 1:N_points
        hams_single[i,n] = mean(ham_dist(res_all[i].step_msa[1], res_all[i].step_msa[n]))
        chi_dyn_single[i,n] = var(ham_dist(res_all[i].step_msa[1], res_all[i].step_msa[n]))
    end
end


close("all")
plt.plot()
for i in 1:5
    plt.plot(steps, hams_single[i,:],label = "seq $(idx_seqs[i])")
end


plt.legend()
plt.xlabel("MCMC steps")
plt.xscale("log")
plt.ylabel("[H_A]")
plt.savefig("../single_wt_mean_ham_dist.png")
        

close("all")
plt.plot()
for i in 1:200
    plt.plot(steps, chi_dyn_single[i,:], alpha = 0.05, color = "grey")
end

plt.plot(steps, mean(chi_dyn_single, Weights(w_seqs), dims = 1)[1,:], linewidth = 2.0, color = "black", label = "chi_dyn")

plt.legend()
plt.xlabel("MCMC steps")
plt.xscale("log")
plt.ylabel("chi_dyn_A")
plt.savefig("../single_wt_chi_dyn.png")


hams_single_all = mean(hams_all,dims = 3)[:,:,1]; mean_hams_single_all = mean(hams_single_all, Weights(w_seqs), dims = 1)[1,:];

close("all")
plt.plot()
for i in 1:N_start_seq
    plt.plot(steps, hams_single_all[i,:], alpha = 0.2, color = "grey")
end

plt.plot(steps, mean_hams_single_all, linewidth = 2.0, color = "black", label = "<[H_A]>")


plt.legend()
plt.xlabel("MCMC steps")
plt.xscale("log")
plt.ylabel("[H_A]")
plt.savefig("../all_single_wt_mean_ham_dist.png")
    


hams0 = mean(hams_all,dims = 3)[:,:,1]; first_part = mean(hams0 .^2, Weights(w_seqs), dims = 1)[1,:];
second_part  = (mean(hams0, Weights(w_seqs), dims = 1)[1,:]) .^2; chi_back = first_part - second_part;

hams0_tot = mean(hams_all .^2 ,dims = 3)[:,:,1]; first_part_tot = mean(hams0_tot, Weights(w_seqs), dims = 1)[1,:];
chi_tot = first_part_tot - second_part; 

chi_dyn = mean(chi_dyn_single, Weights(w_seqs), dims = 1)[1,:];

close("all")

plt.plot(res_all[1].steps, chi_dyn, color = "red", label = "chi_dyn")
plt.plot(res_all[1].steps, chi_back, color = "blue", label = "chi_back")
plt.plot(res_all[1].steps, chi_tot, color = "black", label = "chi_tot")

plt.legend()
plt.xscale("log")
plt.xlabel("MCMC steps")

savefig("../suscept_dbd_weight.png")

sum(abs2, chi_tot - chi_dyn - chi_back)

@save "../../data_Genie/suscept_sample_dbd.png"

## Testing Aging

In [None]:
dbd_msa  = read_fasta_alignment("../Gen.jl/data/alignments/natural/DBD_alignment.uniref90.cov80.a2m", 0.9);
@load "../data_Genie/pars_dbd.jld2"

#@load "../data_Genie/pars_dbd_act.jld2"

#h_dbd = h_dbd_act; J_dbd = J_dbd_act;

#@load "../data_Genie/pars_dbd_dec.jld2"

#h_dbd = h_dbd_dec; J_dbd = J_dbd_dec;


N_steps = 1*10^6; N_chains = 200; NN_points = 1000; steps = unique([
        trunc(Int,10^y) for y in range(log10(1), log10(N_steps), 
                length=NN_points)]); N_points = length(steps); res_all_T = [
    ]; idx_seqs = rand(1:size(dbd_msa,2), N_chains); Ts = [0.95, 1.0];

start_seqs = dbd_msa[:, idx_seqs];
@time res = run_evolution(Int8.(start_seqs), 
        h_dbd, 
        J_dbd, 
        p = 0.5, 
        temp = 1.0, 
        N_points = 5, 
        N_steps = 1*10^6);    
start_msa = res.step_msa[end];

@time for temp in Ts
    println(temp)
    @time res = run_evolution(Int8.(start_msa), 
        h_dbd, 
        J_dbd, 
        p = 0.5, 
        temp = temp, 
        N_points = NN_points, 
        N_steps = N_steps);    
    push!(res_all_T, res) 
end


hams_delayed1 = []; hams_delayed2 = []; hams_delayed3 = []; t_idxs = [95,262,428]; 
for i in 1:length(Ts)
    push!(hams_delayed1, [ham_dist(res_all_T[i].step_msa[t_idxs[1]], x) 
            for x in res_all_T[i].step_msa[t_idxs[1]:end]])
    push!(hams_delayed2, [ham_dist(res_all_T[i].step_msa[t_idxs[2]], x) 
            for x in res_all_T[i].step_msa[t_idxs[2]:end]])
    push!(hams_delayed3, [ham_dist(res_all_T[i].step_msa[t_idxs[3]], x) 
            for x in res_all_T[i].step_msa[t_idxs[3]:end]])
end


steps = res_all_T[1].steps;z_min = minimum(Ts); z_max = maximum(Ts);normalized_Ts = [
    (z - z_min) / (z_max - z_min) for z in Ts]; cmap = get_cmap("viridis"); cols = [cmap(z) for z in 
        normalized_Ts];


close("all")
plt.plot()
for i in 1:length(Ts)
    plt.plot(1 .+ steps[t_idxs[1]:end] .- steps[t_idxs[1]], [mean(x) for x in hams_delayed1[i]], color=cols[i], linewidth = 0.3, linestyle = ":",label = "T = $(round(Ts[i], digits = 2)) t_w = 10^2")
    plt.plot(1 .+ steps[t_idxs[2]:end] .- steps[t_idxs[2]], [mean(x) for x in hams_delayed2[i]], color=cols[i], linewidth = 1., linestyle = "--", label = "T = $(round(Ts[i], digits = 2)) t_w = 10^3")
    plt.plot(1 .+ steps[t_idxs[3]:end] .- steps[t_idxs[3]], [mean(x) for x in hams_delayed3[i]], color=cols[i], linewidth = 5., linestyle = "-.", label = "T = $(round(Ts[i], digits = 2)) t_w = 10^4")
end


plt.legend()
plt.xlabel("MCMC steps")
plt.xscale("log")
plt.ylabel("Mean Hamming Distance")
plt.savefig("../delayed_mean_ham_dist.png")



## Testing G4 correlation

In [None]:
N_chains = 1000; N_steps = 10^6; start_seqs = dbd_msa[:, [22754,19,44,100,674]]; N_wts = size(start_seqs,2);
start_msas = [hcat([start_seqs[:,j] for i in 1:N_chains]...) for j in 1:N_wts]; 
w = compute_weights(dbd_msa,
    22, 0.2)[1]; idx_seqs = sample(collect(1:size(dbd_msa,2)), ProbabilityWeights(w), 
    N_chains); sub_msa = dbd_msa[:,idx_seqs];

res = [];

for i in 1:N_wts
    println(i)
    push!(res, run_evolution(Int8.(start_msas[i]), 
               h_dbd, 
               J_dbd, 
               p = 0.5, 
               N_points = 20, 
               N_steps = N_steps));
end

g4_nat = [G4(sub_msa, start_seqs[:,i], w[idx_seqs]) for i in 1:N_wts];
g4_sil = [G4(res[i].step_msa[end], start_seqs[:,i]) for i in 1:N_wts];
corr = [cor(g4_nat[i][:], g4_sil[i][:]) for i in 1:N_wts];


for i in 1:N_wts
    close("all")
    plt.scatter(g4_nat[i], g4_sil[i], label = "Cor: $(round(corr[i], digits = 3))")
    plt.title("Seq $(i)")
    plt.legend()
    plt.xlabel("G4_nat")
    plt.ylabel("G4_sil")
    savefig("../corr_seq$(i).png")
end


cdes = [cont_dep_entr(start_seqs[:,i], h_dbd, J_dbd) for i in 1:N_wts];
cie = CIE(dbd_msa);

epist = [std(abs.(cie-cdes[i])) for in 1:N_wts]


In [None]:
seqs_to_read = []
T = 1.0
for filename in readdir("../data_Genie/Distances10")
    namesplit = split(filename,"_")
    if namesplit[1] == "dist" && namesplit[2][1:2] == "se" && namesplit[6] == "T" * string(T) * ".gz"
        push!(seqs_to_read,parse(Int,namesplit[2][4:end]))
    end
end



L =76;
all_dist_seqs_mean = Dict{}()
all_dist_seqs_var = Dict{}()
all_dist_seqs_norm_mean = Dict{}()
all_dist_seqs_norm_var = Dict{}()
filename = "../data_Genie/Distances10/dist_wt_1000samples_1000000steps_10000stored_T" * string(T)
dist_vec = Genie.read_dist_from_file(filename, true)
all_dist_seqs_mean[0] = mean(dist_vec, dims = 2)[:,1]
all_dist_seqs_var[0] = var(dist_vec, dims = 2)[:,1]
all_dist_seqs_norm_mean[0] = mean(dist_vec ./ L, dims = 2)[:,1]
all_dist_seqs_norm_var[0] = var(dist_vec ./ L, dims = 2)[:,1]
for s in seqs_to_read
    filename = "../data_Genie/Distances10/dist_seq" * string(s) * "_1000samples_1000000steps_10000stored_T" * string(T)
    dist_vec = Genie.read_dist_from_file(filename, true)
    all_dist_seqs_mean[s] = mean(dist_vec, dims = 2)[:,1]
    all_dist_seqs_var[s] = var(dist_vec, dims = 2)[:,1]
    all_dist_seqs_norm_mean[s] = mean(dist_vec ./ L, dims = 2)[:,1]
    all_dist_seqs_norm_var[s] = var(dist_vec ./ L, dims = 2)[:,1]
end

In [1]:
cie = CIE(dbd_msa); N_wts = length(seqs_to_read); epist = zeros(N_wts); cdes = zeros(L,
    N_wts); max_bump = zeros(N_wts); t_bump = zeros(N_wts); m_epist = zeros(N_wts);

for i in 1:N_wts
    cdes[:,i] .= cont_dep_entr(dbd_msa[:,seqs_to_read[i]], h_dbd, J_dbd)
    epist[i] = std(abs.( (2 .^ cie) .- (2 .^ cdes[:,i])))
    m_epist[i] = mean(abs.( (2 .^ cie) .- (2 .^ cdes[:,i])))
    m,t = findmax(all_dist_seqs_var[seqs_to_read[i]])
    final_m = mean(all_dist_seqs_var[seqs_to_read[i]][end-200:end])
    max_bump[i] = (m - final_m) 
    t_bump[i] = t
    if i % 10 == 0
        println(i)
    end
end

cor(m_epist, max_bump)
cor(epist, max_bump)

close("all"); plt.scatter(m_epist, max_bump ./(L^2)
    ); plt.xlabel("Mean(|2^CIE - 2^CDE|)"); plt.ylabel("Chi_max-chi_end"); savefig("../mean_epistvs_delta_bump.png")
close("all"); plt.scatter(epist, max_bump ./(L^2)
    ); plt.xlabel("Std(|2^CIE - 2^CDE|)"); plt.ylabel("Chi_max-chi_end");savefig("../std_epistvs_delta_bump.png")


close("all"); plt.scatter(sum(cdes,dims=1), max_bump ./(L^2)
    ); plt.xlabel("Sum(CDE)"); plt.ylabel("Chi_max-chi_end");savefig("../tot_cdevs_delta_bump.png")

cor(, max_bump)

LoadError: UndefVarError: `CIE` not defined