In [1]:
using BenchmarkTools
using Dates
using ProgressBars
using Distances

using Plots
using Plots.PlotMeasures
using StatsPlots, KernelDensity


In [3]:
include("../src/foldexity.jl")
include("../src/fxio.jl")
include("../src/entropy.jl")

In [None]:
pdb = readpdb_backbone("../testpdb/d1914a1.pdb")


pdb2fasta(pdb.resname)

213-element Vector{String}:
 "PHE"
 "PHE"
 "PHE"
 "GLN"
 "GLN"
 "GLN"
 "THR"
 "THR"
 "THR"
 "TRP"
 ⋮
 "LEU"
 "LEU"
 "LEU"
 "MET"
 "MET"
 "MET"
 "VAL"
 "VAL"
 "VAL"

In [4]:
fxity, average_rmsd, num_clusts, norm_num_clusts, num_frags, matrix = fxpdb("../testpdb/AF-Q8VCK6-F1-model_v4.pdb", 4, "seq", 1.0) 
fxity

Starting foldexity...


3.929238240339544

In [None]:
fxdir("../testpdb", "fxdata.tsv", 6, "knn", 1.0)
df = CSV.read("fxdata.tsv", DataFrame, delim = "\t")
rm("fxdata.tsv")
df

In [None]:
fsize = 4
cutoff = 2

pdb = readpdb_backbone("../testpdb/d1914a1.pdb")
writepdb(pdb, "../testpdb/backbone.pdb")

xyzcoords = pdb2xyz(pdb)
xyzcoords = coords2kmers(xyzcoords, fsize)

pdbmatrix = pdb2pdbmatrix(pdb)
pdbfragments = coords2kmers(pdbmatrix, fsize)

fxity, average_rmsd, num_clusts, norm_num_clusts, num_frags, matrix = fxity_kabsh(xyzcoords)

pfrag1 = xyzcoords[10]
pfrag2 = xyzcoords[11]

kabsch_umeyama(pfrag1, pfrag2)

In [None]:
nfrags = length(xyzcoords)  # Change this to the desired size
matrix = zeros(Float64, nfrags, nfrags)

for i = 1:nfrags # Fill the upper triangle
    for j = i+1:nfrags  # Ensure j >= i for the upper triangle
        matrix[i, j] = kabsch_umeyama(xyzcoords[i], xyzcoords[j])
    end
end

matrix += matrix' #make a symmetric matrix
aver_rmsd = sum(matrix) / (nfrags * nfrags)

cl = hclust(matrix, linkage=:complete)
results = cutree(cl, h=cutoff) 
nclusts = length(unique(results))
norm_nclusts = nclusts / nfrags

       


In [None]:
# Perform hierarchical clustering
cl = hclust(matrix, linkage=:complete)

# Function to convert hclust to Newick format with matrix indices
function hclust_to_newick(cl::Hclust, labels::Vector{String})
    function build_tree(node)
        if node < 0  # leaf node
            index = -node
            return labels[index]  # Use label from the matrix
        else  # internal node
            left, right = cl.merge[node, :]
            left_tree = build_tree(left)
            right_tree = build_tree(right)
            return "($(left_tree):$(cl.height[node]),$(right_tree):$(cl.height[node]))"
        end
    end
    return build_tree(size(cl.merge, 1)) * ";"
end

# Define labels for each matrix point
labels = ["kmer_$i" for i in 1:size(matrix, 1)]

# Convert to Newick format with labels
newick_tree = hclust_to_newick(cl, labels)
println(newick_tree)

In [None]:
function alphabet_heatmap(m, cmap)
    scale = 12
    i,j = size(m)
    h = heatmap(m, 
        color=cmap,  
        xlabel="Kmer1", ylabel="Kmer2", 
        grid=true, 
        #xticks=(1:10, 1:10:i), yticks=(1:j, 1:10:j), 
        
        size=([i,j] * scale),  
        margins = 5mm,
        dpi=500, yrotation = 90, tickfontsize=8)

    h = vline!(0.5:(j+0.5), c=:grey)
    h = hline!(0.5:(i+0.5), c=:grey, legend=false)
    #savefig("heatmap_3di_vs_AA.png")
    return h
end

morder = reverse(cl.order)
alphabet_heatmap(matrix[morder,morder], :bluesreds)

In [None]:
# dump PDB fragments
pdbfile = "../testpdb/d1914a1.pdb"
outdir =  split(basename(pdbfile), ".")[1]
if isdir(outdir)
    rm(outdir, force = true, recursive=true)
end

mkpath(outdir)



pdb = readpdb_backbone(pdbfile)
pdbfragments = coords2kmers(pdbmatrix, 4)
i = 1
for (cl, frag) in zip(results, pdbfragments)
    writepdb(pdbmatrix2pdb(frag), "$outdir/cl$(cl)frag$i.pdb")
    i+=1
end