In [1]:
using DelimitedFiles
using Plots
using Random
using LinearAlgebra
using Statistics
using GZip
using DCAUtils
using StatsBase
using StatsPlots
using Clustering
using LaTeXStrings

In [None]:
function read_dist_from_file(filename::String, zipped::Bool)

    if zipped
        filename = filename * ".gz" 
        file = GZip.open(filename,"r")
        dist_vec = Int.(readdlm(file))
        close(file)
    else
        file = open(filename,"r")
        dist_vec = Int.(readdlm(file))
        close(file)
    end

    return dist_vec
    
end

# Protein plots

In [None]:
families = ["WW","DBD","CM","AAC6","DHFR","BetaLact","SerineProtease"]
Lfam = Dict("WW" => 31, "DBD" => 76, "CM" => 96, "AAC6" => 117, "DHFR" => 160, "BetaLact" => 202, "SerineProtease" => 254)

In [15]:
### Read the susceptibilities
Points = Dict{}()
Chi_Dyn = Dict{}()
Chi_Back = Dict{}()
for fam in families
    filename = "../evolutions/" * fam * "/Susceptibilities_evolution40000.dat"
    file = open(filename)
    data = readdlm(file)
    Points[fam] = data[:,1]
    Chi_Dyn[fam] = data[:,2]
    Chi_Back[fam] = data[:,3]
    close(file)
end

In [29]:
### Read the peak and sum(CDE) points
Seqs = Dict{}()
ChiMax = Dict{}()
CDEsum = Dict{}()
for fam in families
    filename = "../evolutions/" * fam * "/MaxChiDyn_and_SumCDE.dat"
    file = open(filename)
    data = readdlm(file)
    Seqs[fam] = data[:,1]
    ChiMax[fam] = data[:,2]
    CDEsum[fam] = data[:,3] ./ Lfam[fam]
    close(file)
end

In [None]:
### Read entropy evolution file and compute chi^A_dyn

L = 76
Nsweeps = 40000
Nsteps = Nsweeps * L
sample1 = 17681
sample2 = 13870
sample3 = 19232
points_seq = unique([trunc(Int,10^y) for y in range(log10(1), log10(Nsteps), length=10000)]) ./ L
Npoints = length(points_seq)

gzip_file_pathSample1 = "../evolutions/DBD/Entropies/Entropy_Seq" * string(sample1) * "_1000samples_" * string(Nsteps) * "steps_T1.0.gz"
gzip_file_pathSample2 = "../evolutions/DBD/Entropies/Entropy_Seq" * string(sample2) * "_1000samples_" * string(Nsteps) * "steps_T1.0.gz"
gzip_file_pathSample3 = "../evolutions/DBD/Entropies/Entropy_Seq" * string(sample3) * "_1000samples_" * string(Nsteps) * "steps_T1.0.gz"
# println(gzip_file_path)  
FullEntrVecSample1 = Array{Float64,2}(undef,Npoints,L)  
FullEntrVecSample2 = Array{Float64,2}(undef,Npoints,L)  
FullEntrVecSample3 = Array{Float64,2}(undef,Npoints,L)  
GZip.open(gzip_file_pathSample1) do f
    FullEntrVecSample1 .= readdlm(f,'\t')[:,1:L]
end
GZip.open(gzip_file_pathSample2) do f
    FullEntrVecSample2 .= readdlm(f,'\t')[:,1:L]
end
GZip.open(gzip_file_pathSample3) do f
    FullEntrVecSample3 .= readdlm(f,'\t')[:,1:L]
end

filenameSample1 = "../evolutions/DBD/Distances/DistancesT10/dist_seq" * string(sample1) * "_1000samples_" * string(Nsteps) * "steps_10000stored_T1.0"
filenameSample2 = "../evolutions/DBD/Distances/DistancesT10/dist_seq" * string(sample2) * "_1000samples_" * string(Nsteps) * "steps_10000stored_T1.0"
filenameSample3 = "../evolutions/DBD/Distances/DistancesT10/dist_seq" * string(sample3) * "_1000samples_" * string(Nsteps) * "steps_10000stored_T1.0"
dist_vecSample1 = read_dist_from_file(filenameSample1, true)
dist_vecSample2 = read_dist_from_file(filenameSample2, true)
dist_vecSample3 = read_dist_from_file(filenameSample3, true)
dist_seqs_norm_varSample1 = var(dist_vecSample1 ./ L, dims = 2)[:,1]
dist_seqs_norm_varSample2 = var(dist_vecSample2 ./ L, dims = 2)[:,1]
dist_seqs_norm_varSample3 = var(dist_vecSample3 ./ L, dims = 2)[:,1]

In [None]:
### Plot Figure 2 (a)

# For sample 1 the epistatic sites are: 51, 8, 71, 17, 2 
# For sample 2 the epistatic sites are: 51, 16, 47, 48, 28 
# For sample 3 the epistatic sites are: 70, 73, 69, 72, 74 

transp = 0.6
plot(points_seq,FullEntrVecSample3[:,70], color = :red, alpha = transp, xaxis = :log, xticks = 10.0 .^collect(-2:6),
xtickfontsize=10,ytickfontsize=10,labelfontsize=18)
plot!(points_seq,FullEntrVecSample3[:,73], color = :red, alpha = transp)
plot!(points_seq,FullEntrVecSample3[:,69], color = :red, alpha = transp)
plot!(points_seq,FullEntrVecSample3[:,72], color = :red, alpha = transp)
plot!(points_seq,FullEntrVecSample3[:,74], color = :red, alpha = transp)
plot!(xlabel=L"\textrm{MC\ Sweeps}")  
plot!(ylabel=L"\textrm{Entropy}")       

plot!(twinx(),points_seq,dist_seqs_norm_varSample3, color = :blue, xaxis = :log, 
xticks = 10.0 .^collect(-2:6), ylabel=L"\chi^{\textrm{A}}_{\textrm{dyn}}", ylims=(0,0.003),
xtickfontsize=10,ytickfontsize=10,labelfontsize=18)  

plot!(bottom_margin = 6Plots.mm)
plot!(right_margin = 5Plots.mm)
plot!(left_margin = 3Plots.mm)
plot!(legend=:none)
plot!(size=(600,200))
# savefig("gigino.pdf")
# savefig("gigino.png")

In [None]:
### Plot Figure 2 (b)
transp = 0.8

p = []
for fam in families[1:end-1]
    pl = scatter(CDEsum[fam],ChiMax[fam], color = :firebrick, alpha = transp,
    xtickfontsize=10,ytickfontsize=10,labelfontsize=18)
    posy = round.(range(extrema(ChiMax[fam])..., length = 5),digits=4)
    posx = round.(range(extrema(CDEsum[fam])..., length = 5),digits=2)
    plot!(xticks = posx, yticks = posy)
    # plot!(xformatter = _->"")
    plot!(legend=:none)
    plot!(left_margin = 3Plots.mm)
    push!(p,pl)
end

fam = families[end]
pl = scatter(CDEsum[fam],ChiMax[fam], color = :firebrick, alpha = transp,
xtickfontsize=10,ytickfontsize=10,labelfontsize=18)
plot!(ylabel=L"\textrm{max}(\chi^{\textrm{A}}_{\textrm{dyn}})")       
plot!(xlabel=L"1/L\sum_i \textrm{CDE}_i")  
plot!(bottom_margin = 1Plots.mm)
plot!(right_margin = 2Plots.mm)
plot!(left_margin = 1Plots.mm)
plot!(legend=:none)
plot!(size=(600,800))
push!(p,pl)

pb1 = plot(legend=false,grid=false,foreground_color_subplot=:white);
pb2 = plot(legend=false,grid=false,foreground_color_subplot=:white);

l = @layout [grid(1,2); grid(1,2); grid(1,2); grid(1,3, widths=[0.1,0.66,0.24])]

# println(length(families))
plot(p[1],p[2],p[3],p[4],p[5],p[6],pb1,p[7],pb2,layout = l)
# savefig("gigino.png")
savefig("gigino.pdf")

In [None]:
### Plot Figure 3

p = []
for fam in families[1:end-1]
    pl = plot(Points[fam],Chi_Dyn[fam]+Chi_Back[fam], xaxis = :log, label = L"\chi_{\rm tot}", xticks = 10.0 .^collect(-3:7), color = :black, linewidth = 3)
    plot!(Points[fam],Chi_Back[fam], xaxis = :log, label = L"\chi_{\rm back}", color = :blue, linewidth = 3)
    plot!(Points[fam],Chi_Dyn[fam], xaxis = :log, label = L"\chi_{\rm dyn}", color = :red, linewidth = 3,
    xtickfontsize=12,ytickfontsize=12,labelfontsize=20,legendfontsize=16)
    plot!(ylabel=L"\chi")     
    plot!(xformatter = _->"")
    ylims!(0,0.025)
    xlims!(10^-1,10^5)
    plot!(legend=:none)
    push!(p,pl)
end

fam = families[end]
pl = plot(Points[fam],Chi_Dyn[fam]+Chi_Back[fam], xaxis = :log, label = L"\chi_{\rm tot}", xticks = 10.0 .^collect(-3:7), color = :black, linewidth = 3)
plot!(Points[fam],Chi_Back[fam], xaxis = :log, label = L"\chi_{\rm back}", color = :blue, linewidth = 3)
plot!(Points[fam],Chi_Dyn[fam], xaxis = :log, label = L"\chi_{\rm dyn}", color = :red, linewidth = 3,
xtickfontsize=12,ytickfontsize=12,labelfontsize=20,legendfontsize=16)
plot!(ylabel=L"\chi")   
plot!(xlabel=L"\textrm{MC\ Sweeps}")  
plot!(bottom_margin = 1Plots.mm)
plot!(right_margin = 2Plots.mm)
plot!(left_margin = 4Plots.mm)
plot!(legend = :topleft, legend_column = -1)
plot!(size=(700,1000))
ylims!(0,0.025)
xlims!(10^-1,10^5)
push!(p,pl)

# println(length(families))
plot(p[1],p[2],p[3],p[4],p[5],p[6],p[7],layout = (length(families),1))
# savefig("gigino.png")
savefig("gigino.pdf")

# RNA plots

In [None]:
familiesRNA = ["TPPRiboswitch","tRNA"]
LfamRNA = Dict("TPPRiboswitch" => 105, "tRNA" => 71)

In [5]:
### Read the susceptibilities
PointsRNA = Dict{}()
Chi_DynRNA = Dict{}()
Chi_BackRNA = Dict{}()
for fam in familiesRNA
    filename = "../evolutions/" * fam * "/Susceptibilities_evolution40000.dat"
    file = open(filename)
    data = readdlm(file)
    PointsRNA[fam] = data[:,1]
    Chi_DynRNA[fam] = data[:,2]
    Chi_BackRNA[fam] = data[:,3]
    close(file)
end

In [6]:
### Read the peak and sum(CDE) points
SeqsRNA = Dict{}()
ChiMaxRNA = Dict{}()
CDEsumRNA = Dict{}()
for fam in familiesRNA
    filename = "../evolutions/" * fam * "/MaxChiDyn_and_SumCDE.dat"
    file = open(filename)
    data = readdlm(file)
    SeqsRNA[fam] = data[:,1]
    ChiMaxRNA[fam] = data[:,2]
    CDEsumRNA[fam] = data[:,3] ./ LfamRNA[fam]
    close(file)
end

In [None]:
### Plot Figure Supplementary 1 (a)

p1 = plot(PointsRNA["TPPRiboswitch"],Chi_DynRNA["TPPRiboswitch"]+Chi_BackRNA["TPPRiboswitch"], xaxis = :log, label = L"\chi_{\rm tot}", xticks = 10.0 .^collect(-3:7), color = :black, linewidth = 3)
plot!(PointsRNA["TPPRiboswitch"],Chi_BackRNA["TPPRiboswitch"], xaxis = :log, label = L"\chi_{\rm back}", color = :blue, linewidth = 3)
plot!(PointsRNA["TPPRiboswitch"],Chi_DynRNA["TPPRiboswitch"], xaxis = :log, label = L"\chi_{\rm dyn}", color = :red, linewidth = 3,
xtickfontsize=12,ytickfontsize=12,labelfontsize=20,legendfontsize=16)
plot!(ylabel=L"\chi")     
plot!(xformatter = _->"")
ylims!(0,0.010)
xlims!(10^-1,10^5)
plot!(legend = :topleft, legend_column = -1)

p2 = plot(PointsRNA["tRNA"],Chi_DynRNA["tRNA"]+Chi_BackRNA["tRNA"], xaxis = :log, label = L"\chi_{\rm tot}", xticks = 10.0 .^collect(-3:7), color = :black, linewidth = 3)
plot!(PointsRNA["tRNA"],Chi_BackRNA["tRNA"], xaxis = :log, label = L"\chi_{\rm back}", color = :blue, linewidth = 3)
plot!(PointsRNA["tRNA"],Chi_DynRNA["tRNA"], xaxis = :log, label = L"\chi_{\rm dyn}", color = :red, linewidth = 3,
xtickfontsize=12,ytickfontsize=12,labelfontsize=20,legendfontsize=16)
plot!(ylabel=L"\chi")   
plot!(xlabel=L"\textrm{MC\ Sweeps}")  
plot!(bottom_margin = 1Plots.mm)
plot!(right_margin = 2Plots.mm)
plot!(left_margin = 4Plots.mm)
plot!(size=(700,280))
ylims!(0,0.010)
xlims!(10^-1,10^5)
plot!(legend=:none)

# println(length(families))
plot(p1,p2,layout = (2,1))
# savefig("gigino.png")
savefig("gigino.pdf")

In [None]:
### Plot Figure Supplementary 1 (b)
transp = 0.8

p1 = scatter(CDEsumRNA["TPPRiboswitch"],ChiMaxRNA["TPPRiboswitch"], color = :firebrick, alpha = transp,
xtickfontsize=10,ytickfontsize=10,labelfontsize=18)
plot!(legend=:none)
plot!(ylabel=L"\textrm{max}(\chi^{\textrm{A}}_{\textrm{dyn}})")       
plot!(xlabel=L"1/L\sum_i \textrm{CDE}_i")  
plot!(left_margin = 5Plots.mm)
plot!(bottom_margin = 7Plots.mm)

p2 = scatter(CDEsumRNA["tRNA"],ChiMaxRNA["tRNA"], color = :firebrick, alpha = transp,
xtickfontsize=10,ytickfontsize=10,labelfontsize=18)      
plot!(xlabel=L"1/L\sum_i \textrm{CDE}_i")  
plot!(legend=:none)


plot(p1,p2)
xlims!(0.6,1.0)
plot!(size=(600,230))
savefig("gigino.pdf")