diff --git a/pyproject.toml b/pyproject.toml index fd7941a..2071d75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ModDotPlot" -version = "0.9.1" +version = "0.9.2" requires-python = ">= 3.7" dependencies = [ "pysam", diff --git a/src/moddotplot/const.py b/src/moddotplot/const.py index db6639f..9ea8756 100644 --- a/src/moddotplot/const.py +++ b/src/moddotplot/const.py @@ -1,4 +1,4 @@ -VERSION = "0.9.1" +VERSION = "0.9.2" COLS = [ "#query_name", "query_start", diff --git a/src/moddotplot/moddotplot.py b/src/moddotplot/moddotplot.py index e63fcb0..db7ec1f 100644 --- a/src/moddotplot/moddotplot.py +++ b/src/moddotplot/moddotplot.py @@ -260,6 +260,13 @@ def get_parser(): help="Create a dotplot with two different sequences (skips self-identity plots).", ) + static_parser.add_argument( + "--compare-order", + choices=["sequential", "size"], + default="sequential", + help="Order in which sequences appear in the comparative plot. Default is 'sequential': First file on x-axis, second file on y-axis. Another option is 'size': The larger sequence on the x-axis and the smaller on y-axis.", + ) + static_parser.add_argument( "--no-bed", action="store_true", help="Skip output of bed file." ) @@ -906,7 +913,11 @@ def main(): if args.grid or args.grid_only: grid_val_singles = [] grid_val_single_names = [] - sequences = list(zip(seq_list, k_list)) + new_sequences = list(zip(seq_list, k_list)) + if args.compare_order == "size": + sequences = sorted(new_sequences, key=lambda seq: len(seq[1]), reverse=True) + else: + sequences = new_sequences if len(sequences) > 6 and (args.grid or args.grid_only): print("Too many sequences to create a grid. Skipping. \n") @@ -917,6 +928,7 @@ def main(): if not args.compare_only: for i in range(len(sequences)): seq_length = len(sequences[i][1]) + seq_name = sequences[i][0] win = args.window res = args.resolution if args.window: @@ -941,10 +953,10 @@ def main(): seq_sparsity = 2 ** (int(math.log2(seq_sparsity - 1)) + 1) expectation = round(win / seq_sparsity) - print(f"Computing self identity matrix for {sequences[i][0]}... \n") + print(f"Computing self identity matrix for {seq_name}... \n") # TODO: Logging here # print(f"\tSparsity value s: {seq_sparsity}\n") - print(f"\tSequence length n: {len(k_list[i]) + args.kmer - 1}\n") + print(f"\tSequence length n: {seq_length + args.kmer - 1}\n") print(f"\tWindow size w: {win}\n") print(f"\tModimizer sketch size: {expectation}\n") print(f"\tPlot Resolution r: {res}\n") @@ -959,20 +971,21 @@ def main(): args.ambiguous, expectation, ) + print(seq_list[i]) bed = convertMatrixToBed( - self_mat, win, args.identity, seq_list[i], seq_list[i], True + self_mat, win, args.identity, seq_name, seq_name, True ) if args.grid or args.grid_only: grid_val_singles.append(bed) - grid_val_single_names.append(sequences[i][0]) + grid_val_single_names.append(seq_name) if not args.no_bed: # Log saving bed file if not args.output_dir: - bedfile_output = sequences[i][0] + ".bed" + bedfile_output = seq_name + ".bed" else: bedfile_output = os.path.join( - args.output_dir, sequences[i][0] + ".bed" + args.output_dir, seq_name + ".bed" ) with open(bedfile_output, "w") as bedfile: for row in bed: @@ -983,8 +996,8 @@ def main(): create_plots( sdf=[bed], directory=args.output_dir if args.output_dir else ".", - name_x=sequences[i][0], - name_y=sequences[i][0], + name_x=seq_name, + name_y=seq_name, palette=args.palette, palette_orientation=args.palette_orientation, no_hist=args.no_hist, @@ -1022,15 +1035,6 @@ def main(): smaller_length = len(smaller_seq) larger_seq_name = sequences[i][0] smaller_seq_name = sequences[j][0] - - if larger_length < smaller_length: - smaller_seq = sequences[i][1] - larger_seq = sequences[j][1] - larger_length = len(larger_seq) - smaller_length = len(smaller_seq) - larger_seq_name = sequences[j][0] - smaller_seq_name = sequences[i][0] - win = args.window res = args.resolution if args.window: @@ -1062,10 +1066,10 @@ def main(): print(f"\tPlot Resolution r: {res}\n") pair_mat = createPairwiseMatrix( - larger_length, smaller_length, - larger_seq, + larger_length, smaller_seq, + larger_seq, win, seq_sparsity, args.delta, @@ -1098,17 +1102,17 @@ def main(): # Log saving bed file if not args.output_dir: bedfile_output = ( - smaller_seq_name + larger_seq_name + "_" - + larger_seq_name + + smaller_seq_name + "_COMPARE.bed" ) else: bedfile_output = os.path.join( args.output_dir, - smaller_seq_name + larger_seq_name + "_" - + larger_seq_name + + smaller_seq_name + "_COMPARE.bed", ) with open(bedfile_output, "w") as bedfile: @@ -1120,8 +1124,8 @@ def main(): create_plots( sdf=[bed], directory=args.output_dir if args.output_dir else ".", - name_x=smaller_seq_name, - name_y=larger_seq_name, + name_x=larger_seq_name, + name_y=smaller_seq_name, palette=args.palette, palette_orientation=args.palette_orientation, no_hist=args.no_hist, diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index eb4e1bd..8ddf5ea 100644 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -1,4 +1,3 @@ -from ast import excepthandler from plotnine import ( ggsave, ggplot, @@ -282,32 +281,28 @@ def read_df( return df -def generate_breaks(number, min_breaks=6, max_breaks=8): +def generate_breaks(number, min_breaks=5, max_breaks=9): # Determine the order of magnitude magnitude = 10 ** int( - np.floor(np.log10(number)) + math.floor(np.log10(number)) ) # Base power of 10 (e.g., 10M, 1M, 100K) - - # Find a reasonable step size - possible_steps = [ - magnitude // d - for d in [10, 5, 4, 2, 1] - if (number // (magnitude // d)) in range(min_breaks, max_breaks + 1) - ] - - step = ( - possible_steps[0] if possible_steps else magnitude // 5 - ) # Default to 1/5th if no exact match - + threshold = math.ceil(number / magnitude) + while threshold > max_breaks: + magnitude = magnitude * 2 + threshold = np.ceil(number / magnitude) + while threshold < min_breaks: + magnitude = magnitude / 2 + threshold = np.ceil(number / magnitude) # Generate breakpoints - breaks = list(range(0, ((number // step) + 1) * step, step)) + breaks = list(range(0, (threshold * magnitude) + magnitude, magnitude)) return breaks def make_dot( sdf, - title_name, + name_x, + name_y, palette, palette_orientation, colors, @@ -316,7 +311,17 @@ def make_dot( xlim, deraster, width, + is_pairwise, ): + if is_pairwise: + title_name = f"Comparative Plot: {name_x} vs {name_y}" + else: + title_name = f"Self-Identity Plot: {name_x}" + title_length = 2 * width + if len(title_name) > 50: + title_length = 1.5 * width + elif len(title_name) > 80: + title_length = width # Select the color palette if hasattr(diverging, palette): function_name = getattr(diverging, palette) @@ -374,8 +379,10 @@ def make_dot( axis_ticks_major=element_line( size=(width), color="black" ), # Increased tick length - title=element_blank(), # Center title - axis_title_x=element_text(size=(width * 1.2), family=["DejaVu Sans"]), + title=element_text( + family=["DejaVu Sans"], size=title_length, hjust=0.5 + ), # Center title + axis_title_x=element_text(size=(width * 1.4), family=["DejaVu Sans"]), strip_background=element_blank(), # Remove facet strip background strip_text=element_text( size=(width * 1.2), family=["DejaVu Sans"] @@ -695,7 +702,7 @@ def make_tri( axis_ticks_major_y=element_blank(), axis_ticks_major=element_line(size=(width)), title=element_text(size=(width * 1.4), hjust=0.5), - axis_title_x=element_text(size=(width * 1.2), family=["DejaVu Sans"]), + axis_title_x=element_text(size=(width * 1.4), family=["DejaVu Sans"]), axis_text_y=element_blank(), ) ) @@ -1362,7 +1369,8 @@ def create_plots( if is_pairwise: heatmap = make_dot( sdf, - plot_filename, + name_x, + name_y, palette, palette_orientation, custom_colors, @@ -1371,6 +1379,7 @@ def create_plots( xlim, deraster, width, + True, ) print(f"Creating plots and saving to {plot_filename}...\n") ggsave( @@ -1394,17 +1403,17 @@ def create_plots( try: if not heatmap.data: print( - f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n" + f"{plot_filename}_COMPARE.{vector_format} and {plot_filename}_COMPARE.png saved sucessfully. \n" ) return 0 except ValueError: print( - f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n" + f"{plot_filename}_COMPARE.{vector_format} and {plot_filename}_COMPARE.png saved sucessfully. \n" ) return 0 if no_hist: print( - f"{plot_filename}_COMPARE.pdf and {plot_filename}_COMPARE.png saved sucessfully. \n" + f"{plot_filename}_COMPARE.{vector_format} and {plot_filename}_COMPARE.png saved sucessfully. \n" ) else: ggsave( @@ -1446,7 +1455,8 @@ def create_plots( ) full_plot = make_dot( check_st_en_equality(sdf), - plot_filename, + name_x, + name_y, palette, palette_orientation, custom_colors, @@ -1455,6 +1465,7 @@ def create_plots( xlim, deraster, width, + False, ) ggsave( full_plot,