```r
library(philentropy)

# Get raw data containing binary matrix sample columns and Em values
raw_data = read.csv("../datasets/gel-score-matrix.csv")

# Get binary matrix excluding Em value column
bin_matrix = data.matrix(raw_data[,-1])

# Transpose the matrix
bin_matrix = t(bin_matrix)

# compute the jaccard distance
# see getDistMethods() for different distances
dist_matrix = distance(bin_matrix, method = "jaccard", use.row.names = TRUE)

# Get Sample Count
sample_count = ncol(dist_matrix)

# Get all the labels of data frame except first column # [[2]] will get only character array
sample_labels = labels(raw_data[,-1])[[2]]

####################### HEATMAP METHOD 1 ####################
## Construct Heatmap of Distance Matrix ## Method 1

# use image function to construct image
image(1:sample_count, 1:sample_count, dist_matrix, axes = FALSE, xlab="", ylab="")

# set x axis labels
axis(1, 1:sample_count, sample_labels, cex.axis = 0.5, las=3)
# set y axis labels
axis(2, 1:sample_count, sample_labels, cex.axis = 0.5, las=1)
# set distance values
text(expand.grid(1:sample_count, 1:sample_count), sprintf("%0.1f", dist_matrix), cex=0.6)

####################### HEATMAP METHOD 2 ####################
## Construct Heatmap of Distance Matrix ## Method 2


############# NOT REQUIRED #################
# convert dist_matrix to frame and add labels
# dist_matrix = as.data.frame(dist_matrix)

# Check attributes
# attributes(dist_matrix)
# set attribute column name
# colnames(dist_matrix) = sample_labels

#add id variables
# dist_matrix$id = sample_labels

# Do the distance calculation stuff
# ...

# reorder matrix function mat is distna ## function will show error ## TODO: develop method to reorder
#reorder_distmat <- function(distance_mat){
#  dd <- as.dist(distance_mat)/2)
#  hc <- hclust(dd)
#  mt <- distance_mat[hc$order, hc$order]
#}
############################################

# Use reshape2 library to create a melted matrix
library(reshape2)
melted_dist_matrix <- melt(data=dist_matrix)

library("ggplot2")
ggplot(data = melted_dist_matrix, mapping = aes(x = Var1, y = Var2, fill = value)) + geom_tile() + theme(axis.text.x = element_text(angle = 90))

# play with plots
ggplot(data = melted_dist_matrix, aes(x = Var1, y = Var2, fill = value)) + 
  geom_tile(color = "white") + 
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(0,1), space = "Lab", name="Jaccard Distance") +
  theme_minimal()+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 1, size = 10, hjust = 1))+
  coord_fixed()

##### convert the distance matrix to upper triangular matrix ####
dist_matrix_utr = dist_matrix
dist_matrix_utr[lower.tri(dist_matrix)] <- 0 # use upper.tri(dist_matrix) fro conversion to lower triangular form

melted_dist_matrix_utr <- melt(data=dist_matrix_utr)

ggplot(data = melted_dist_matrix_utr, aes(x = Var1, y = Var2, fill = value)) + 
  geom_tile(color = "white") + 
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(0,1), space = "Lab", name="Jaccard Distance") +
  theme_minimal()+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 1, size = 10, hjust = 1))+
  coord_fixed()

################# Clustering and Linkage ##################
###########################################################

# convert to linear form of distance matrix
dist_matrix_linear = as.dist(dist_matrix)

# cluster based on average linkage
tree = hclust(dist_matrix_linear, method = "average")

# plot cluster dendrogram
plot(tree) # plot with hanging nodes
plot(tree, hang=-1) # dendrogram style plot

# change the leaf order to optimal leaf order
print(tree$order) # print the leaf order

# Method: OLO - optimal leaf order Bar-Joseph et al., 2001, GW - Gruvaeus and Wainer, 1972
tree_ordered = reorder(x = tree, dist = dist_matrix_linear, method = "OLO") 
plot(tree_ordered, hang=-1)

######################### SERIATION PACKAGE #############################
# Use Seriation package for more methods # Lot of other functionality
library(seriation)
leaf_order = seriate(dist_matrix_linear, method = "OLO") 
print(leaf_order)

# Heatmap generated by Seriate library with leaf ordering
hmap(dist_matrix_linear, hclustfun = NULL, distfun = NULL, method = "OLO", col=greenred(100))

#pimage(dist_matrix_linear, seriate(dist_matrix_linear), col = c("white", "red"), prop = TRUE, axes="both", main = "Reordered data", key = FALSE)

################ custom hclust and dist function try ################
# custom functions for hmap x = distance matrix (non flat/linear)
# hclust_custom <- function(x) hclust(x, method = "average")

# distance_custom <- function(matrix) as.dist(distance(matrix, method = "jaccard", use.row.names = TRUE))

# hmap(bin_matrix, hclustfun = hclust_custom, distfun = distance_custom, method = "OLO")
# hmap(bin_matrix, hclustfun = NULL, distfun = distance_custom, method = "OLO")
############ OTHER
# library(dendsort) # also try this package

```