Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ModDotPlot"
version = "0.9.1"
version = "0.9.2"
requires-python = ">= 3.7"
dependencies = [
"pysam",
Expand Down
2 changes: 1 addition & 1 deletion src/moddotplot/const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = "0.9.1"
VERSION = "0.9.2"
COLS = [
"#query_name",
"query_start",
Expand Down
56 changes: 30 additions & 26 deletions src/moddotplot/moddotplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,13 @@ def get_parser():
help="Create a dotplot with two different sequences (skips self-identity plots).",
)

static_parser.add_argument(
"--compare-order",
choices=["sequential", "size"],
default="sequential",
help="Order in which sequences appear in the comparative plot. Default is 'sequential': First file on x-axis, second file on y-axis. Another option is 'size': The larger sequence on the x-axis and the smaller on y-axis.",
)

static_parser.add_argument(
"--no-bed", action="store_true", help="Skip output of bed file."
)
Expand Down Expand Up @@ -906,7 +913,11 @@ def main():
if args.grid or args.grid_only:
grid_val_singles = []
grid_val_single_names = []
sequences = list(zip(seq_list, k_list))
new_sequences = list(zip(seq_list, k_list))
if args.compare_order == "size":
sequences = sorted(new_sequences, key=lambda seq: len(seq[1]), reverse=True)
else:
sequences = new_sequences
if len(sequences) > 6 and (args.grid or args.grid_only):
print("Too many sequences to create a grid. Skipping. \n")

Expand All @@ -917,6 +928,7 @@ def main():
if not args.compare_only:
for i in range(len(sequences)):
seq_length = len(sequences[i][1])
seq_name = sequences[i][0]
win = args.window
res = args.resolution
if args.window:
Expand All @@ -941,10 +953,10 @@ def main():
seq_sparsity = 2 ** (int(math.log2(seq_sparsity - 1)) + 1)
expectation = round(win / seq_sparsity)

print(f"Computing self identity matrix for {sequences[i][0]}... \n")
print(f"Computing self identity matrix for {seq_name}... \n")
# TODO: Logging here
# print(f"\tSparsity value s: {seq_sparsity}\n")
print(f"\tSequence length n: {len(k_list[i]) + args.kmer - 1}\n")
print(f"\tSequence length n: {seq_length + args.kmer - 1}\n")
print(f"\tWindow size w: {win}\n")
print(f"\tModimizer sketch size: {expectation}\n")
print(f"\tPlot Resolution r: {res}\n")
Expand All @@ -959,20 +971,21 @@ def main():
args.ambiguous,
expectation,
)
print(seq_list[i])
bed = convertMatrixToBed(
self_mat, win, args.identity, seq_list[i], seq_list[i], True
self_mat, win, args.identity, seq_name, seq_name, True
)
if args.grid or args.grid_only:
grid_val_singles.append(bed)
grid_val_single_names.append(sequences[i][0])
grid_val_single_names.append(seq_name)

if not args.no_bed:
# Log saving bed file
if not args.output_dir:
bedfile_output = sequences[i][0] + ".bed"
bedfile_output = seq_name + ".bed"
else:
bedfile_output = os.path.join(
args.output_dir, sequences[i][0] + ".bed"
args.output_dir, seq_name + ".bed"
)
with open(bedfile_output, "w") as bedfile:
for row in bed:
Expand All @@ -983,8 +996,8 @@ def main():
create_plots(
sdf=[bed],
directory=args.output_dir if args.output_dir else ".",
name_x=sequences[i][0],
name_y=sequences[i][0],
name_x=seq_name,
name_y=seq_name,
palette=args.palette,
palette_orientation=args.palette_orientation,
no_hist=args.no_hist,
Expand Down Expand Up @@ -1022,15 +1035,6 @@ def main():
smaller_length = len(smaller_seq)
larger_seq_name = sequences[i][0]
smaller_seq_name = sequences[j][0]

if larger_length < smaller_length:
smaller_seq = sequences[i][1]
larger_seq = sequences[j][1]
larger_length = len(larger_seq)
smaller_length = len(smaller_seq)
larger_seq_name = sequences[j][0]
smaller_seq_name = sequences[i][0]

win = args.window
res = args.resolution
if args.window:
Expand Down Expand Up @@ -1062,10 +1066,10 @@ def main():
print(f"\tPlot Resolution r: {res}\n")

pair_mat = createPairwiseMatrix(
larger_length,
smaller_length,
larger_seq,
larger_length,
smaller_seq,
larger_seq,
win,
seq_sparsity,
args.delta,
Expand Down Expand Up @@ -1098,17 +1102,17 @@ def main():
# Log saving bed file
if not args.output_dir:
bedfile_output = (
smaller_seq_name
larger_seq_name
+ "_"
+ larger_seq_name
+ smaller_seq_name
+ "_COMPARE.bed"
)
else:
bedfile_output = os.path.join(
args.output_dir,
smaller_seq_name
larger_seq_name
+ "_"
+ larger_seq_name
+ smaller_seq_name
+ "_COMPARE.bed",
)
with open(bedfile_output, "w") as bedfile:
Expand All @@ -1120,8 +1124,8 @@ def main():
create_plots(
sdf=[bed],
directory=args.output_dir if args.output_dir else ".",
name_x=smaller_seq_name,
name_y=larger_seq_name,
name_x=larger_seq_name,
name_y=smaller_seq_name,
palette=args.palette,
palette_orientation=args.palette_orientation,
no_hist=args.no_hist,
Expand Down
61 changes: 36 additions & 25 deletions src/moddotplot/static_plots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from ast import excepthandler
from plotnine import (
ggsave,
ggplot,
Expand Down Expand Up @@ -282,32 +281,28 @@ def read_df(
return df


def generate_breaks(number, min_breaks=6, max_breaks=8):
def generate_breaks(number, min_breaks=5, max_breaks=9):
# Determine the order of magnitude
magnitude = 10 ** int(
np.floor(np.log10(number))
math.floor(np.log10(number))
) # Base power of 10 (e.g., 10M, 1M, 100K)

# Find a reasonable step size
possible_steps = [
magnitude // d
for d in [10, 5, 4, 2, 1]
if (number // (magnitude // d)) in range(min_breaks, max_breaks + 1)
]

step = (
possible_steps[0] if possible_steps else magnitude // 5
) # Default to 1/5th if no exact match

threshold = math.ceil(number / magnitude)
while threshold > max_breaks:
magnitude = magnitude * 2
threshold = np.ceil(number / magnitude)
while threshold < min_breaks:
magnitude = magnitude / 2
threshold = np.ceil(number / magnitude)
# Generate breakpoints
breaks = list(range(0, ((number // step) + 1) * step, step))
breaks = list(range(0, (threshold * magnitude) + magnitude, magnitude))

return breaks


def make_dot(
sdf,
title_name,
name_x,
name_y,
palette,
palette_orientation,
colors,
Expand All @@ -316,7 +311,17 @@ def make_dot(
xlim,
deraster,
width,
is_pairwise,
):
if is_pairwise:
title_name = f"Comparative Plot: {name_x} vs {name_y}"
else:
title_name = f"Self-Identity Plot: {name_x}"
title_length = 2 * width
if len(title_name) > 50:
title_length = 1.5 * width
elif len(title_name) > 80:
title_length = width
# Select the color palette
if hasattr(diverging, palette):
function_name = getattr(diverging, palette)
Expand Down Expand Up @@ -374,8 +379,10 @@ def make_dot(
axis_ticks_major=element_line(
size=(width), color="black"
), # Increased tick length
title=element_blank(), # Center title
axis_title_x=element_text(size=(width * 1.2), family=["DejaVu Sans"]),
title=element_text(
family=["DejaVu Sans"], size=title_length, hjust=0.5
), # Center title
axis_title_x=element_text(size=(width * 1.4), family=["DejaVu Sans"]),
strip_background=element_blank(), # Remove facet strip background
strip_text=element_text(
size=(width * 1.2), family=["DejaVu Sans"]
Expand Down Expand Up @@ -695,7 +702,7 @@ def make_tri(
axis_ticks_major_y=element_blank(),
axis_ticks_major=element_line(size=(width)),
title=element_text(size=(width * 1.4), hjust=0.5),
axis_title_x=element_text(size=(width * 1.2), family=["DejaVu Sans"]),
axis_title_x=element_text(size=(width * 1.4), family=["DejaVu Sans"]),
axis_text_y=element_blank(),
)
)
Expand Down Expand Up @@ -1362,7 +1369,8 @@ def create_plots(
if is_pairwise:
heatmap = make_dot(
sdf,
plot_filename,
name_x,
name_y,
palette,
palette_orientation,
custom_colors,
Expand All @@ -1371,6 +1379,7 @@ def create_plots(
xlim,
deraster,
width,
True,
)
print(f"Creating plots and saving to {plot_filename}...\n")
ggsave(
Expand All @@ -1394,17 +1403,17 @@ def create_plots(
try:
if not heatmap.data:
print(
f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n"
f"{plot_filename}_COMPARE.{vector_format} and {plot_filename}_COMPARE.png saved sucessfully. \n"
)
return 0
except ValueError:
print(
f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n"
f"{plot_filename}_COMPARE.{vector_format} and {plot_filename}_COMPARE.png saved sucessfully. \n"
)
return 0
if no_hist:
print(
f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n"
f"{plot_filename}_COMPARE.{vector_format} and {plot_filename}_COMPARE.png saved sucessfully. \n"
)
else:
ggsave(
Expand Down Expand Up @@ -1446,7 +1455,8 @@ def create_plots(
)
full_plot = make_dot(
check_st_en_equality(sdf),
plot_filename,
name_x,
name_y,
palette,
palette_orientation,
custom_colors,
Expand All @@ -1455,6 +1465,7 @@ def create_plots(
xlim,
deraster,
width,
False,
)
ggsave(
full_plot,
Expand Down