From d737654491071eab69ed1c9609e3cf999141fbfb Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Mon, 11 Aug 2025 22:27:13 -0400 Subject: [PATCH 1/8] Merge svg's for annotation track and dotplot --- pyproject.toml | 4 +- src/moddotplot/const.py | 2 +- src/moddotplot/static_plots.py | 127 ++++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 38 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 39eb0c2..4eba38c 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ModDotPlot" -version = "0.9.5" +version = "0.9.6" requires-python = ">= 3.7" dependencies = [ "pysam", @@ -30,7 +30,7 @@ authors = [ maintainers = [ {name = "Alex Sweeten", email = "alex.sweeten@nih.gov"} ] -readme = {file = "README.txt", content-type = "text/markdown"} +readme = {file = "README.md", content-type = "text/markdown"} license = {file = "LICENSE"} keywords = ["dotplot", "sketching", "modimizer", "heatmap"] diff --git a/src/moddotplot/const.py b/src/moddotplot/const.py index 1302336..e5332ea 100755 --- a/src/moddotplot/const.py +++ b/src/moddotplot/const.py @@ -1,4 +1,4 @@ -VERSION = "0.9.5" +VERSION = "0.9.6" COLS = [ "#query_name", "query_start", diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index bf6055f..7a92a3a 100755 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -110,8 +110,8 @@ def generate_ini_file( "file_type = bed", ] - if x_axis: - sections.insert(0, "[x-axis]") + '''if x_axis: + sections.insert(0, "[x-axis]")''' ini_content = "\n".join(sections) with open(f"{ininame}.ini", "w") as file: @@ -172,10 +172,57 @@ def run_pygenometracks(inifile, region, output_file, width): plot_regions=region, plot_width=width, ) + + # Extract the chromosome, start, and end from the region + chrom, start, end = region[0] + + # Call the plot method directly to generate the image + fig = trp.plot(output_file, chrom, start, end) + + return trp +def test_pygenometracks_direct(inifile, chrom, start, end, output_file, width=40): + """ + Direct test function to call PlotTracks.plot() without any coordinate validation. + This bypasses the region checking that happens during PlotTracks initialization. + + Args: + inifile: Path to the tracks configuration file + chrom: Chromosome name + start: Start coordinate + end: End coordinate + output_file: Output image file path + width: Figure width in cm + """ + + output_dir = os.path.dirname(output_file) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Create a dummy region for initialization (this gets overridden in plot()) + dummy_region = [(chrom, 1, 1000)] + + trp = PlotTracks( + inifile, + width, + fig_height=None, + fontsize=10, + dpi=300, + track_label_width=0.05, + plot_regions=dummy_region, + plot_width=width, + ) + + + # Call plot() directly with your desired coordinates + fig = trp.plot(output_file, chrom, start, end) + + return fig + + def reverse_pascal(double_vals): if len(double_vals) == 1: return 2 @@ -1061,43 +1108,48 @@ def append_svg(svg1_path, svg2_path, output_path): def get_svg_size(svg_path): - """Returns the width and height of an SVG file.""" - fig = sg.fromfile(svg_path) - return fig.get_size() + """Helper to extract width and height of an SVG in pt units.""" + import xml.etree.ElementTree as ET + tree = ET.parse(svg_path) + root = tree.getroot() + width = root.get("width") + height = root.get("height") + return width, height -def merge_svgs(svg1_path, svg2_path, output_path): - """Merges two SVG files into a single SVG file. +def parse_size(size_str): + """Convert '648pt' or '800px' -> float(648).""" + return float(re.sub(r'[a-zA-Z]+', '', size_str)) - Args: - svg1_path: Path to the first SVG file. - svg2_path: Path to the second SVG file. - output_path: Path to save the merged SVG file. - """ - # Create figure and subplots - - w, l = get_svg_size(svg1_path) - print(w, l) - w2, l2 = get_svg_size(svg2_path) - print(w2, l2) - # fig = sg.SVGFigure("780pt", "432pt") - # fig = sg.SVGFigure(810, 450) - fig = sg.SVGFigure("110pt", "100pt") - # Load the two SVG files +def merge_svgs(svg1_path, svg2_path, output_path): + """Merges two SVG files into a single SVG file with proper size.""" + + w1, h1 = get_svg_size(svg1_path) + w2, h2 = get_svg_size(svg2_path) + + # Ensure they are floats + w1, h1 = parse_size(w1), parse_size(h1) + w2, h2 = parse_size(w2), parse_size(h2) + print(w1,h1,w2,h2) + # Determine total size + total_width = max(w1, w2) + total_height = h1 + h2 + + # Create figure + fig = sg.SVGFigure(f"{total_width}px", f"{total_height}px") + + # Load SVGs svg1 = sg.fromfile(svg1_path).getroot() svg2 = sg.fromfile(svg2_path).getroot() - # Optionally, adjust the position and size of the loaded SVGs - svg1.moveto(0, 0) - svg2.moveto(0, 600) + # Position + svg1.moveto(0, -300) + svg2.moveto(38, 300) - # Append the loaded SVGs to the figure + # Append and save fig.append([svg1, svg2]) - - # Save the merged SVG to a new file + fig.set_size((f"{total_width}px", f"{total_height}px")) fig.save(output_path) - print(get_svg_size(output_path)) - def make_tri_axis(sdf, title_name, palette, palette_orientation, colors, breaks, xlim): if not breaks: @@ -1582,7 +1634,9 @@ def create_plots( ) min_val = max(sdf["q_st"].min(), sdf["r_st"].min()) max_val = max(sdf["q_en"].max(), sdf["r_en"].max()) - region = [(name_x.split(":")[0], min_val, max_val)] + if not xlim: + xlim = max_val + region = [(name_x.split(":")[0], min_val, xlim)] if inifile: bed_track = run_pygenometracks( inifile=inifile, @@ -1595,10 +1649,10 @@ def create_plots( f"{iniprefix}_ANNOTATION.{vector_format}", name_x.split(":")[0], min_val, - max_val, + xlim, ) bed_track.plot( - f"{iniprefix}_ANNOTATION.png", name_x.split(":")[0], min_val, max_val + f"{iniprefix}_ANNOTATION.png", name_x.split(":")[0], min_val, xlim ) print( f"\nAnnotation track saved to {iniprefix}_ANNOTATION.{vector_format} & {iniprefix}_ANNOTATION.png\n" @@ -1731,7 +1785,6 @@ def create_plots( filename=f"{tri_prefix}.svg", verbose=False, ) - # These scaling values were determined thorugh much trial and error. Please don't delete :) if deraster: scaling_values = (46.62 * width, -3.75 * width) @@ -1759,7 +1812,11 @@ def create_plots( ) except: print(f"Error installing cairosvg. Unable to convert svg file. \n") - + if annotation: + merge_svgs(f"{tri_prefix}.svg",f"{iniprefix}_ANNOTATION.svg", f"{tri_prefix}_ANNOTATED.svg") + cairosvg.svg2pdf( + url=f"{tri_prefix}_ANNOTATED.svg", write_to=f"{tri_prefix}_ANNOTATED.pdf" + ) # Convert from svg to selected vector format. Ignore error if user has issues with cairosvg. try: if vector_format != "svg": From 1284c40397b048a4b03806bfd18afdfe13a30ce8 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 16:46:46 -0700 Subject: [PATCH 2/8] Add cooler support --- src/moddotplot/estimate_identity.py | 69 +++++++++++++----- src/moddotplot/moddotplot.py | 109 +++++++++++++++++++++++++--- 2 files changed, 149 insertions(+), 29 deletions(-) diff --git a/src/moddotplot/estimate_identity.py b/src/moddotplot/estimate_identity.py index d9da261..099992e 100644 --- a/src/moddotplot/estimate_identity.py +++ b/src/moddotplot/estimate_identity.py @@ -9,6 +9,8 @@ from palettable import colorbrewer from typing import List, Set, Dict, Tuple import mmh3 +import pandas as pd +import cooler from moddotplot.parse_fasta import printProgressBar @@ -205,32 +207,61 @@ def convertMatrixToBed( ) return bed - -def divide_into_chunks(lst: List[int], res: int) -> List[List[int]]: +def convertMatrixToCool( + matrix, window_size, id_threshold, x_name, y_name, + self_identity, x_offset, y_offset, chromsizes, output_cool +): """ - Divide a list into approximately equal-sized chunks. + Convert a matrix into a .cool file. Args: - lst (List[int]): The input list to be divided. - res (int): The desired number of result chunks. - - Returns: - List[List[int]]: A list of lists, where each inner list contains elements from the input list. + matrix (ndarray): 2D numpy array. + window_size (int): Bin/window size in bp. + id_threshold (float): Percent identity threshold (0-100). + x_name (str): Chromosome name for rows. + y_name (str): Chromosome name for columns. + self_identity (bool): Whether to include self and upper triangle only. + x_offset (int): Genomic offset for x axis (start position). + y_offset (int): Genomic offset for y axis (start position). + chromsizes (dict): Dict of chromosome lengths, e.g. {"chr1": 248956422}. + output_cool (str): Path to save cooler file. """ - chunk_size = len(lst) // res # Calculate the target chunk size - remainder = len(lst) % res # Calculate the remainder + rows, cols = matrix.shape + + # ---- build bin table ---- + bins = [] + for i in range(rows): + bins.append((x_name, i * window_size + x_offset, (i+1) * window_size + x_offset)) + for j in range(cols): + bins.append((y_name, j * window_size + y_offset, (j+1) * window_size + y_offset)) + + bins = pd.DataFrame(bins, columns=["chrom", "start", "end"]) - chunks = [] - start = 0 + # ---- build pixel table ---- + pixels = [] + for x in range(rows): + for y in range(cols): + value = matrix[x, y] + if (not self_identity) or (self_identity and x <= y): + if value >= id_threshold / 100: + bin1_id = x + bin2_id = rows + y # offset y bins after x bins + pixels.append((bin1_id, bin2_id, float(value))) + + pixels = pd.DataFrame(pixels, columns=["bin1_id", "bin2_id", "count"]) + + # ---- write cooler ---- + cooler.create_cooler( + output_cool, + bins=bins, + pixels=pixels, + ordered=True + ) + print(bins) + print(pixels) - for i in range(res): - end = ( - start + chunk_size + (1 if i < remainder else 0) - ) # Adjust chunk size for remainder - chunks.append(lst[start:end]) - start = end + return output_cool - return chunks def binomial_distance(containment_value: float, kmer_value: int) -> float: diff --git a/src/moddotplot/moddotplot.py b/src/moddotplot/moddotplot.py index abff278..7f932b9 100755 --- a/src/moddotplot/moddotplot.py +++ b/src/moddotplot/moddotplot.py @@ -13,6 +13,7 @@ selfContainmentMatrix, pairwiseContainmentMatrix, convertMatrixToBed, + convertMatrixToCool, createSelfMatrix, createPairwiseMatrix, partitionOverlaps, @@ -27,6 +28,7 @@ import numpy as np import pickle import os +import cooler def get_parser(): @@ -270,6 +272,10 @@ def get_parser(): help="Order in which sequences appear in the comparative plot. Default is 'sequential': First file on x-axis, second file on y-axis. Another option is 'size': The larger sequence on the x-axis and the smaller on y-axis.", ) + static_parser.add_argument( + "--cooler", action="store_true", help="Output matrix to cooler file." + ) + static_parser.add_argument( "--no-bedpe", action="store_true", help="Skip output of paired-end bed file." ) @@ -924,7 +930,7 @@ def main(): # Create output directory, if doesn't exist: if (args.output_dir) and not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) + os.makedirs(args.output_dir,exist_ok=True) # -----------COMPUTE SELF-IDENTITY PLOTS----------- if not args.compare_only: for i in range(len(sequences)): @@ -976,7 +982,7 @@ def main(): args.identity, args.ambiguous, expectation, - ) + ) bed = convertMatrixToBed( self_mat, win, @@ -991,14 +997,53 @@ def main(): grid_val_singles.append(bed) grid_val_single_names.append(seq_name) + if args.cooler: + try: + cooler_path = "." + if not args.output_dir: + cooler_path = os.path.join(cooler_path, seq_name) + else: + cooler_path = os.path.join(args.output_dir, seq_name) + os.makedirs(cooler_path,exist_ok=True) + cooler_output = os.path.join( + cooler_path, + seq_name + ".cooler" + ) + convertMatrixToCool( + matrix = self_mat, + window_size = win, + id_threshold = args.identity, + x_name = seq_name, + y_name = seq_name, + self_identity=True, + x_offset=seq_start_pos, + y_offset=seq_start_pos, + chromsizes=seq_length, + output_cool=cooler_output + ) + print( + f"Saved self-identity matrix as a cooler file to {cooler_output}\n" + ) + except Exception as e: + print(f"Error creating cooler file: {e}") + if not args.no_bedpe: # Log saving bed file + bedpe_path = "." if not args.output_dir: - bedfile_output = seq_name + ".bedpe" + bedpe_path = os.path.join(bedpe_path, seq_name) + os.makedirs(bedpe_path,exist_ok=True) + bedfile_output = os.path.join( + seq_name, + seq_name + ".bedpe" + ) else: + bedpe_path = os.path.join(args.output_dir, seq_name) + os.makedirs(bedpe_path,exist_ok=True) bedfile_output = os.path.join( - args.output_dir, seq_name + ".bedpe" + bedpe_path, seq_name + ".bedpe" ) + with open(bedfile_output, "w") as bedfile: for row in bed: bedfile.write("\t".join(map(str, row)) + "\n") @@ -1009,7 +1054,7 @@ def main(): if (not args.no_plot) and (not args.grid_only): create_plots( sdf=[bed], - directory=args.output_dir if args.output_dir else ".", + directory=bedpe_path, name_x=seq_name, name_y=seq_name, palette=args.palette, @@ -1110,12 +1155,42 @@ def main(): f"The pairwise identity matrix for {sequences[i][0]} and {sequences[j][0]} is empty. Skipping.\n" ) else: + if args.cooler: + try: + cooler_path = "." + if not args.output_dir: + cooler_path = os.path.join(cooler_path, f"{larger_seq_name}_{smaller_seq_name}") + else: + cooler_path = os.path.join(args.output_dir, f"{larger_seq_name}_{smaller_seq_name}") + os.makedirs(cooler_path,exist_ok=True) + cooler_output = os.path.join( + cooler_path, + f"{larger_seq_name}_{smaller_seq_name}.cooler" + ) + convertMatrixToCool( + matrix = pair_mat, + window_size = win, + id_threshold = args.identity, + x_name = larger_seq_name, + y_name = smaller_seq_name, + self_identity=False, + x_offset=larger_seq_start_pos, + y_offset=smaller_seq_start_pos, + chromsizes=larger_length, + output_cool=cooler_output + ) + print( + f"Saved comparative matrix as a cooler file to {cooler_output}\n" + ) + except Exception as e: + print(f"Error creating pairwise cooler file: {e}") bed = convertMatrixToBed( pair_mat, win, args.identity, - seq_list[i], - seq_list[j], + #check if this is correct + larger_seq_name, + smaller_seq_name, False, larger_seq_start_pos, smaller_seq_start_pos, @@ -1128,16 +1203,30 @@ def main(): xlim_val_grid = max(larger_length, xlim_val_grid) if not args.no_bedpe: # Log saving bed file + bedpe_path = "." if not args.output_dir: - bedfile_output = ( + bedfile_prefix = ( larger_seq_name + "_" + smaller_seq_name + ) + bedpe_path = os.path.join(bedpe_path, bedfile_prefix) + os.makedirs(bedpe_path, exist_ok=True) + bedfile_output = ( + bedpe_path, + bedfile_prefix + "_COMPARE.bedpe" ) else: + bedfile_prefix = ( + larger_seq_name + + "_" + + smaller_seq_name + ) + bedpe_path = os.path.join(args.output_dir, bedfile_prefix) + os.makedirs(bedpe_path,exist_ok=True) bedfile_output = os.path.join( - args.output_dir, + bedpe_path, larger_seq_name + "_" + smaller_seq_name @@ -1153,7 +1242,7 @@ def main(): if (not args.no_plot) and (not args.grid_only): create_plots( sdf=[bed], - directory=args.output_dir if args.output_dir else ".", + directory=bedpe_path, name_x=larger_seq_name, name_y=smaller_seq_name, palette=args.palette, From a6725528b14721da996f6029fd260494f4eacb3a Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 22:39:50 -0700 Subject: [PATCH 3/8] Resize annotation tracks for tri --- src/moddotplot/static_plots.py | 369 ++++++++++++++++++++++++--------- 1 file changed, 269 insertions(+), 100 deletions(-) diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index 7a92a3a..4f8e4f0 100755 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -31,6 +31,7 @@ import cairosvg import pandas as pd import numpy as np +import glob from PIL import Image import patchworklib as pw import math @@ -110,8 +111,8 @@ def generate_ini_file( "file_type = bed", ] - '''if x_axis: - sections.insert(0, "[x-axis]")''' + if x_axis: + sections.insert(0, "[x-axis]") ini_content = "\n".join(sections) with open(f"{ininame}.ini", "w") as file: @@ -156,55 +157,170 @@ def read_annotation_bed(filepath): return df +def make_svg_background_transparent(svg_path, output_path=None): + """ + Makes the background of an SVG file transparent by removing/modifying background fills. + + Args: + svg_path: Path to input SVG file + output_path: Path to output SVG file (if None, overwrites input) + """ + import xml.etree.ElementTree as ET + import re + + if output_path is None: + output_path = svg_path + + # Parse the SVG + tree = ET.parse(svg_path) + root = tree.getroot() + + # Define SVG namespace + ns = {"svg": "http://www.w3.org/2000/svg"} + + # Remove background rectangles/paths that cover the entire canvas + # Get SVG dimensions for comparison + width = root.get('width', '0') + height = root.get('height', '0') + + # Extract numeric values + width_num = float(re.sub(r'[a-zA-Z%]+', '', width)) if width != '0' else 0 + height_num = float(re.sub(r'[a-zA-Z%]+', '', height)) if height != '0' else 0 + + # Find and modify background elements + elements_to_modify = [] + + # Check all paths, rectangles, and other elements + for elem in root.iter(): + if elem.tag.endswith('path') or elem.tag.endswith('rect') or elem.tag.endswith('polygon'): + # Check if this element has a background-like fill + style = elem.get('style', '') + fill = elem.get('fill', '') + + # Look for background colors (light colors, white, etc.) + background_colors = ['#ffffff', '#f0ffff', 'white', 'lightblue', 'lightgray', 'lightgrey'] + + is_background = False + current_fill = None + + if 'fill:' in style: + # Extract fill from style + fill_match = re.search(r'fill:\s*([^;]+)', style) + if fill_match: + current_fill = fill_match.group(1).strip() + elif fill: + current_fill = fill + + if current_fill and any(bg_color in current_fill.lower() for bg_color in background_colors): + is_background = True + + # For paths, check if it covers a large area (likely background) + if elem.tag.endswith('path'): + d = elem.get('d', '') + # Simple heuristic: if path starts at 0,0 and covers large area, it's likely background + if 'M 0' in d and current_fill: + is_background = True + + # For rectangles, check if it covers the full canvas + if elem.tag.endswith('rect'): + x = float(elem.get('x', 0)) + y = float(elem.get('y', 0)) + w = float(elem.get('width', 0)) + h = float(elem.get('height', 0)) + + # If rectangle covers most/all of the canvas, it's likely background + if x <= 1 and y <= 1 and w >= width_num * 0.9 and h >= height_num * 0.9: + is_background = True + + if is_background: + elements_to_modify.append(elem) + + # Modify the background elements + for elem in elements_to_modify: + style = elem.get('style', '') + + if 'fill:' in style: + # Replace fill in style + new_style = re.sub(r'fill:\s*[^;]+', 'fill: transparent', style) + elem.set('style', new_style) + elif elem.get('fill'): + # Replace fill attribute + elem.set('fill', 'transparent') + + + # Save the modified SVG + tree.write(output_path, encoding='unicode', xml_declaration=True) + +def make_all_svg_backgrounds_transparent(directory): + """ + Makes all SVG files in a directory have transparent backgrounds. + """ + + svg_files = glob.glob(os.path.join(directory, "*.svg")) + + for svg_file in svg_files: + make_svg_background_transparent(svg_file) + + print(f"Processed {len(svg_files)} SVG files") + def run_pygenometracks(inifile, region, output_file, width): output_dir = os.path.dirname(output_file) if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) # Create directory if it doesn't exist - trp = PlotTracks( - inifile, - width, - fig_height=None, - fontsize=10, - dpi=300, - track_label_width=0.05, - plot_regions=region, - plot_width=width, - ) - - # Extract the chromosome, start, and end from the region - chrom, start, end = region[0] + try: + trp = PlotTracks( + inifile, + width, + fig_height=None, + fontsize=10, + dpi=300, + track_label_width=0.05, + plot_regions=region, + plot_width=width, - - # Call the plot method directly to generate the image - fig = trp.plot(output_file, chrom, start, end) + ) - - return trp + # Extract the chromosome, start, and end from the region + chrom, start, end = region[0] + + # Call the plot method directly to generate the image + fig = trp.plot(output_file, chrom, start, end) + + return trp + + except Exception as e: + if "No valid intervals were found" in str(e): + print(f"No valid intervals found in BED file for region {region[0]}") + print("This is expected when the BED file doesn't overlap with the query region.") + return None + else: + print(f"Error in run_pygenometracks: {e}") + raise e def test_pygenometracks_direct(inifile, chrom, start, end, output_file, width=40): """ Direct test function to call PlotTracks.plot() without any coordinate validation. This bypasses the region checking that happens during PlotTracks initialization. - + Args: inifile: Path to the tracks configuration file chrom: Chromosome name - start: Start coordinate + start: Start coordinate end: End coordinate output_file: Output image file path width: Figure width in cm """ - + output_dir = os.path.dirname(output_file) if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) - + # Create a dummy region for initialization (this gets overridden in plot()) dummy_region = [(chrom, 1, 1000)] - + trp = PlotTracks( inifile, width, @@ -215,8 +331,7 @@ def test_pygenometracks_direct(inifile, chrom, start, end, output_file, width=40 plot_regions=dummy_region, plot_width=width, ) - - + # Call plot() directly with your desired coordinates fig = trp.plot(output_file, chrom, start, end) @@ -285,42 +400,6 @@ def make_scale(vals: list) -> list: return make_m(scaled) -def overlap_axis(rotated_plot, filename, prefix): - scale_factor = math.sqrt(2) + 0.04 - new_width = int(rotated_plot.width / scale_factor) - new_height = int(rotated_plot.height / scale_factor) - resized_rotated_plot = rotated_plot.resize((new_width, new_height), Image.LANCZOS) - - # Step 3: Overlay the resized rotated heatmap onto the original axes - - # Open the original heatmap with axes - image_with_axes = Image.open(filename) - - # Create a blank image with the same size as the original - final_image = Image.new("RGBA", image_with_axes.size) - - # Calculate the position to center the resized rotated image within the original plot area - x_offset = (final_image.width - resized_rotated_plot.width) // 2 - y_offset = (final_image.height - resized_rotated_plot.height) // 2 - y_offset += 2400 - x_offset += 30 - - # Paste the original image with axes onto the final image - final_image.paste(image_with_axes, (0, 0)) - - # Paste the resized rotated plot onto the final image - final_image.paste(resized_rotated_plot, (x_offset, y_offset), resized_rotated_plot) - width, height = final_image.size - cropped_image = final_image.crop((0, height // 2.6, width, height)) - - # Save or show the final image - cropped_image.save(f"{prefix}_TRI.png") - cropped_image.save(f"{prefix}_TRI.pdf", "PDF", resolution=100.0) - - # Remove temp files - if os.path.exists(filename): - os.remove(filename) - def get_colors(sdf, ncolors, is_freq, custom_breakpoints): assert ncolors > 2 and ncolors < 12 @@ -1110,6 +1189,7 @@ def append_svg(svg1_path, svg2_path, output_path): def get_svg_size(svg_path): """Helper to extract width and height of an SVG in pt units.""" import xml.etree.ElementTree as ET + tree = ET.parse(svg_path) root = tree.getroot() width = root.get("width") @@ -1119,38 +1199,52 @@ def get_svg_size(svg_path): def parse_size(size_str): """Convert '648pt' or '800px' -> float(648).""" - return float(re.sub(r'[a-zA-Z]+', '', size_str)) + return float(re.sub(r"[a-zA-Z]+", "", size_str)) + -def merge_svgs(svg1_path, svg2_path, output_path): +def merge_annotation_tri(svg1_path, svg2_path, output_path, deraster, width): """Merges two SVG files into a single SVG file with proper size.""" - + w1, h1 = get_svg_size(svg1_path) w2, h2 = get_svg_size(svg2_path) # Ensure they are floats w1, h1 = parse_size(w1), parse_size(h1) w2, h2 = parse_size(w2), parse_size(h2) - print(w1,h1,w2,h2) # Determine total size total_width = max(w1, w2) total_height = h1 + h2 - # Create figure fig = sg.SVGFigure(f"{total_width}px", f"{total_height}px") # Load SVGs svg1 = sg.fromfile(svg1_path).getroot() svg2 = sg.fromfile(svg2_path).getroot() + make_svg_background_transparent(svg2_path) # Position - svg1.moveto(0, -300) - svg2.moveto(38, 300) + if deraster: + adjust_svg1 = (0,-5*(h1/6)) + adjust_svg2 = (h1/30, 5*(h1/6)) + svg1.moveto(adjust_svg1[0], adjust_svg1[1]) + svg2.scale(1.065) + svg2.moveto(adjust_svg2[0], adjust_svg2[1]) + else: + adjust_svg1 = (0,-5*(h1/6)) + #adjust_svg2 = (h1/45 + (width-5), 5*(h1/6)) + adjust_svg2 = (10+(width/4.5), 5*(h1/6)) + svg1.moveto(adjust_svg1[0], adjust_svg1[1]) + svg2.moveto(adjust_svg2[0], adjust_svg2[1]) + scaling_factor = width/2 + svg2.scale(1.034 + (scaling_factor/1000)) + # Append and save fig.append([svg1, svg2]) fig.set_size((f"{total_width}px", f"{total_height}px")) fig.save(output_path) + def make_tri_axis(sdf, title_name, palette, palette_orientation, colors, breaks, xlim): if not breaks: breaks = True @@ -1615,9 +1709,11 @@ def create_plots( from_file, ) sdf = df - plot_filename = f"{directory}/{name_x}" + + plot_filename = os.path.join(directory, name_x) + if is_pairwise: - plot_filename = f"{directory}/{name_x}_{name_y}" + plot_filename = os.path.join(directory, f"{name_x}_{name_y}") histy = make_hist( sdf, palette, palette_orientation, custom_colors, custom_breakpoints @@ -1638,25 +1734,42 @@ def create_plots( xlim = max_val region = [(name_x.split(":")[0], min_val, xlim)] if inifile: - bed_track = run_pygenometracks( - inifile=inifile, - region=region, - output_file=f"{iniprefix}_ANNOTATION.{vector_format}", - width=width * 2.05, - ) - name_x.split(":")[0] - bed_track.plot( - f"{iniprefix}_ANNOTATION.{vector_format}", - name_x.split(":")[0], - min_val, - xlim, - ) - bed_track.plot( - f"{iniprefix}_ANNOTATION.png", name_x.split(":")[0], min_val, xlim - ) - print( - f"\nAnnotation track saved to {iniprefix}_ANNOTATION.{vector_format} & {iniprefix}_ANNOTATION.png\n" - ) + try: + # Check if the BED file has valid intervals for this region first + bed_df = read_annotation_bed(annotation) + chrom_name = name_x.split(":")[0] + + # Filter for the chromosome and region of interest + valid_intervals = bed_df[ + (bed_df['chrom'] == chrom_name) & + (bed_df['end'] >= min_val) & + (bed_df['start'] <= xlim) + ] + + if valid_intervals.empty: + print(f"No valid intervals found in {annotation} for region {chrom_name}:{min_val}-{xlim}.\n") + print("Skipping annotation track generation.\n") + else: + bed_track = run_pygenometracks( + inifile=inifile, + region=region, + output_file=f"{iniprefix}_ANNOTATION_TRACK.svg", + width=width * 2.05, + ) + bed_track.plot( + f"{iniprefix}_ANNOTATION_TRACK.svg", + name_x.split(":")[0], + min_val, + xlim, + ) + bed_track.plot( + f"{iniprefix}_ANNOTATION_TRACK.png", name_x.split(":")[0], min_val, xlim + ) + print(f"\nAnnotation track saved to {iniprefix}_ANNOTATION_TRACK\n") + + except Exception as e: + print(f"Error processing annotation file {annotation}: {e}\n") + print("Skipping annotation track generation.\n") if is_pairwise: heatmap = make_dot( @@ -1729,7 +1842,7 @@ def create_plots( # Self-identity plots: Output _TRI, _FULL, and _HIST else: if deraster: - print(f"Derasterization turned off. This may take a while...\n") + print(f"Producing dotplots with derasterization turned off. This may take a while...\n") tri_plot = make_tri( sdf, plot_filename, @@ -1756,7 +1869,6 @@ def create_plots( width, False, ) - ggsave( full_plot, width=width, @@ -1785,6 +1897,24 @@ def create_plots( filename=f"{tri_prefix}.svg", verbose=False, ) + if annotation: + anno_prefix = f"{plot_filename}_PRE_ANNOTATED" + annotated_tri = tri_plot[0] + theme( + axis_title_x = element_blank(), + axis_line_x = element_blank(), + axis_text_x = element_blank(), + axis_ticks_minor_x = element_blank(), + axis_ticks=element_blank(), + ) + ggsave( + annotated_tri, + width=width, + height=width, + dpi=dpi, + format="svg", + filename=f"{anno_prefix}.svg", + verbose=False, + ) # These scaling values were determined thorugh much trial and error. Please don't delete :) if deraster: scaling_values = (46.62 * width, -3.75 * width) @@ -1792,9 +1922,9 @@ def create_plots( f"{tri_prefix}.svg", scaling_values[0], scaling_values[1] ) if annotation: - """merge_svgs(f"{plot_filename}_TRI.{vector_format}","test.svg", "test2.svg") - append_svg(f"{plot_filename}_TRI.{vector_format}", "test.svg", "test2.svg") - """ + rotate_vectorized_tri( + f"{anno_prefix}.svg", scaling_values[0], scaling_values[1] + ) try: cairosvg.svg2png( url=f"{tri_prefix}.svg", write_to=f"{tri_prefix}.png", dpi=dpi @@ -1806,6 +1936,11 @@ def create_plots( rotate_rasterized_tri( f"{tri_prefix}.svg", scaling_values[0], scaling_values[1] ) + if annotation: + if annotation: + rotate_rasterized_tri( + f"{anno_prefix}.svg", scaling_values[0], scaling_values[1] + ) try: cairosvg.svg2png( url=f"{tri_prefix}.svg", write_to=f"{tri_prefix}.png", dpi=dpi @@ -1813,10 +1948,44 @@ def create_plots( except: print(f"Error installing cairosvg. Unable to convert svg file. \n") if annotation: - merge_svgs(f"{tri_prefix}.svg",f"{iniprefix}_ANNOTATION.svg", f"{tri_prefix}_ANNOTATED.svg") - cairosvg.svg2pdf( - url=f"{tri_prefix}_ANNOTATED.svg", write_to=f"{tri_prefix}_ANNOTATED.pdf" - ) + # Only merge if annotation was successfully created + if os.path.exists(f"{iniprefix}_ANNOTATION_TRACK.svg"): + make_svg_background_transparent(f"{iniprefix}_ANNOTATION_TRACK.svg") + merge_annotation_tri( + f"{anno_prefix}.svg", + f"{iniprefix}_ANNOTATION_TRACK.svg", + f"{tri_prefix}_ANNOTATED.svg", + deraster, + width + ) + try: + if vector_format != "svg": + if vector_format == "pdf": + cairosvg.svg2pdf( + url=f"{tri_prefix}_ANNOTATED.svg", + write_to=f"{tri_prefix}_ANNOTATED.pdf", + ) + cairosvg.svg2pdf( + url=f"{iniprefix}_ANNOTATION_TRACK.svg", + write_to=f"{iniprefix}_ANNOTATION_TRACK.pdf", + ) + elif vector_format == "ps": + cairosvg.svg2ps( + url=f"{tri_prefix}_ANNOTATED.svg", + write_to=f"{tri_prefix}_ANNOTATED.ps", + ) + cairosvg.svg2ps( + url=f"{tri_prefix}_ANNOTATION_TRACK.svg", + write_to=f"{tri_prefix}_ANNOTATION_TRACK.ps", + ) + if os.path.exists(f"{iniprefix}_ANNOTATION_TRACK.svg"): + os.remove(f"{iniprefix}_ANNOTATION_TRACK.svg") + if os.path.exists(f"{tri_prefix}_ANNOTATED.svg"): + os.remove(f"{tri_prefix}_ANNOTATED.svg") + except Exception as e: + print(f"Error converting annotated SVG: {e}") + else: + print("Annotation file not created, skipping merge step.") # Convert from svg to selected vector format. Ignore error if user has issues with cairosvg. try: if vector_format != "svg": From 075afbec6e2fdce3746c3313f4da2d2c2ac13431 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 23:03:57 -0700 Subject: [PATCH 4/8] Adjust coordiantes for derasterized tri plots in annotation --- src/moddotplot/static_plots.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index 4f8e4f0..a9264ce 100755 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -1224,14 +1224,14 @@ def merge_annotation_tri(svg1_path, svg2_path, output_path, deraster, width): # Position if deraster: + # Its not perfect for width > 18, but good enough adjust_svg1 = (0,-5*(h1/6)) - adjust_svg2 = (h1/30, 5*(h1/6)) + adjust_svg2 = ((9 - (width/2)), 5*(h1/6)) svg1.moveto(adjust_svg1[0], adjust_svg1[1]) - svg2.scale(1.065) + svg2.scale(1.077 + (width/1000)) svg2.moveto(adjust_svg2[0], adjust_svg2[1]) else: adjust_svg1 = (0,-5*(h1/6)) - #adjust_svg2 = (h1/45 + (width-5), 5*(h1/6)) adjust_svg2 = (10+(width/4.5), 5*(h1/6)) svg1.moveto(adjust_svg1[0], adjust_svg1[1]) svg2.moveto(adjust_svg2[0], adjust_svg2[1]) From bd9f66cae49439f89b55c214c606b285925e9cc8 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 23:05:18 -0700 Subject: [PATCH 5/8] Format with black --- src/moddotplot/const.py | 2 +- src/moddotplot/estimate_identity.py | 29 +++-- src/moddotplot/moddotplot.py | 83 +++++++------- src/moddotplot/static_plots.py | 168 ++++++++++++++++------------ 4 files changed, 152 insertions(+), 130 deletions(-) diff --git a/src/moddotplot/const.py b/src/moddotplot/const.py index e5332ea..1302336 100755 --- a/src/moddotplot/const.py +++ b/src/moddotplot/const.py @@ -1,4 +1,4 @@ -VERSION = "0.9.6" +VERSION = "0.9.5" COLS = [ "#query_name", "query_start", diff --git a/src/moddotplot/estimate_identity.py b/src/moddotplot/estimate_identity.py index 099992e..792741c 100644 --- a/src/moddotplot/estimate_identity.py +++ b/src/moddotplot/estimate_identity.py @@ -207,9 +207,18 @@ def convertMatrixToBed( ) return bed + def convertMatrixToCool( - matrix, window_size, id_threshold, x_name, y_name, - self_identity, x_offset, y_offset, chromsizes, output_cool + matrix, + window_size, + id_threshold, + x_name, + y_name, + self_identity, + x_offset, + y_offset, + chromsizes, + output_cool, ): """ Convert a matrix into a .cool file. @@ -231,9 +240,13 @@ def convertMatrixToCool( # ---- build bin table ---- bins = [] for i in range(rows): - bins.append((x_name, i * window_size + x_offset, (i+1) * window_size + x_offset)) + bins.append( + (x_name, i * window_size + x_offset, (i + 1) * window_size + x_offset) + ) for j in range(cols): - bins.append((y_name, j * window_size + y_offset, (j+1) * window_size + y_offset)) + bins.append( + (y_name, j * window_size + y_offset, (j + 1) * window_size + y_offset) + ) bins = pd.DataFrame(bins, columns=["chrom", "start", "end"]) @@ -251,19 +264,13 @@ def convertMatrixToCool( pixels = pd.DataFrame(pixels, columns=["bin1_id", "bin2_id", "count"]) # ---- write cooler ---- - cooler.create_cooler( - output_cool, - bins=bins, - pixels=pixels, - ordered=True - ) + cooler.create_cooler(output_cool, bins=bins, pixels=pixels, ordered=True) print(bins) print(pixels) return output_cool - def binomial_distance(containment_value: float, kmer_value: int) -> float: """ Calculate the binomial distance based on containment and kmer values. diff --git a/src/moddotplot/moddotplot.py b/src/moddotplot/moddotplot.py index 7f932b9..8411fcf 100755 --- a/src/moddotplot/moddotplot.py +++ b/src/moddotplot/moddotplot.py @@ -930,7 +930,7 @@ def main(): # Create output directory, if doesn't exist: if (args.output_dir) and not os.path.exists(args.output_dir): - os.makedirs(args.output_dir,exist_ok=True) + os.makedirs(args.output_dir, exist_ok=True) # -----------COMPUTE SELF-IDENTITY PLOTS----------- if not args.compare_only: for i in range(len(sequences)): @@ -982,7 +982,7 @@ def main(): args.identity, args.ambiguous, expectation, - ) + ) bed = convertMatrixToBed( self_mat, win, @@ -1004,22 +1004,19 @@ def main(): cooler_path = os.path.join(cooler_path, seq_name) else: cooler_path = os.path.join(args.output_dir, seq_name) - os.makedirs(cooler_path,exist_ok=True) - cooler_output = os.path.join( - cooler_path, - seq_name + ".cooler" - ) + os.makedirs(cooler_path, exist_ok=True) + cooler_output = os.path.join(cooler_path, seq_name + ".cooler") convertMatrixToCool( - matrix = self_mat, - window_size = win, - id_threshold = args.identity, - x_name = seq_name, - y_name = seq_name, + matrix=self_mat, + window_size=win, + id_threshold=args.identity, + x_name=seq_name, + y_name=seq_name, self_identity=True, x_offset=seq_start_pos, y_offset=seq_start_pos, chromsizes=seq_length, - output_cool=cooler_output + output_cool=cooler_output, ) print( f"Saved self-identity matrix as a cooler file to {cooler_output}\n" @@ -1032,18 +1029,13 @@ def main(): bedpe_path = "." if not args.output_dir: bedpe_path = os.path.join(bedpe_path, seq_name) - os.makedirs(bedpe_path,exist_ok=True) - bedfile_output = os.path.join( - seq_name, - seq_name + ".bedpe" - ) + os.makedirs(bedpe_path, exist_ok=True) + bedfile_output = os.path.join(seq_name, seq_name + ".bedpe") else: bedpe_path = os.path.join(args.output_dir, seq_name) - os.makedirs(bedpe_path,exist_ok=True) - bedfile_output = os.path.join( - bedpe_path, seq_name + ".bedpe" - ) - + os.makedirs(bedpe_path, exist_ok=True) + bedfile_output = os.path.join(bedpe_path, seq_name + ".bedpe") + with open(bedfile_output, "w") as bedfile: for row in bed: bedfile.write("\t".join(map(str, row)) + "\n") @@ -1159,25 +1151,31 @@ def main(): try: cooler_path = "." if not args.output_dir: - cooler_path = os.path.join(cooler_path, f"{larger_seq_name}_{smaller_seq_name}") + cooler_path = os.path.join( + cooler_path, + f"{larger_seq_name}_{smaller_seq_name}", + ) else: - cooler_path = os.path.join(args.output_dir, f"{larger_seq_name}_{smaller_seq_name}") - os.makedirs(cooler_path,exist_ok=True) + cooler_path = os.path.join( + args.output_dir, + f"{larger_seq_name}_{smaller_seq_name}", + ) + os.makedirs(cooler_path, exist_ok=True) cooler_output = os.path.join( cooler_path, - f"{larger_seq_name}_{smaller_seq_name}.cooler" + f"{larger_seq_name}_{smaller_seq_name}.cooler", ) convertMatrixToCool( - matrix = pair_mat, - window_size = win, - id_threshold = args.identity, - x_name = larger_seq_name, - y_name = smaller_seq_name, + matrix=pair_mat, + window_size=win, + id_threshold=args.identity, + x_name=larger_seq_name, + y_name=smaller_seq_name, self_identity=False, x_offset=larger_seq_start_pos, y_offset=smaller_seq_start_pos, chromsizes=larger_length, - output_cool=cooler_output + output_cool=cooler_output, ) print( f"Saved comparative matrix as a cooler file to {cooler_output}\n" @@ -1188,7 +1186,7 @@ def main(): pair_mat, win, args.identity, - #check if this is correct + # check if this is correct larger_seq_name, smaller_seq_name, False, @@ -1206,25 +1204,22 @@ def main(): bedpe_path = "." if not args.output_dir: bedfile_prefix = ( - larger_seq_name - + "_" - + smaller_seq_name + larger_seq_name + "_" + smaller_seq_name ) bedpe_path = os.path.join(bedpe_path, bedfile_prefix) os.makedirs(bedpe_path, exist_ok=True) bedfile_output = ( bedpe_path, - bedfile_prefix - + "_COMPARE.bedpe" + bedfile_prefix + "_COMPARE.bedpe", ) else: bedfile_prefix = ( - larger_seq_name - + "_" - + smaller_seq_name + larger_seq_name + "_" + smaller_seq_name + ) + bedpe_path = os.path.join( + args.output_dir, bedfile_prefix ) - bedpe_path = os.path.join(args.output_dir, bedfile_prefix) - os.makedirs(bedpe_path,exist_ok=True) + os.makedirs(bedpe_path, exist_ok=True) bedfile_output = os.path.join( bedpe_path, larger_seq_name diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index a9264ce..b73fce8 100755 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -157,110 +157,124 @@ def read_annotation_bed(filepath): return df + def make_svg_background_transparent(svg_path, output_path=None): """ Makes the background of an SVG file transparent by removing/modifying background fills. - + Args: svg_path: Path to input SVG file output_path: Path to output SVG file (if None, overwrites input) """ import xml.etree.ElementTree as ET import re - + if output_path is None: output_path = svg_path - + # Parse the SVG tree = ET.parse(svg_path) root = tree.getroot() - + # Define SVG namespace ns = {"svg": "http://www.w3.org/2000/svg"} - + # Remove background rectangles/paths that cover the entire canvas # Get SVG dimensions for comparison - width = root.get('width', '0') - height = root.get('height', '0') - + width = root.get("width", "0") + height = root.get("height", "0") + # Extract numeric values - width_num = float(re.sub(r'[a-zA-Z%]+', '', width)) if width != '0' else 0 - height_num = float(re.sub(r'[a-zA-Z%]+', '', height)) if height != '0' else 0 - + width_num = float(re.sub(r"[a-zA-Z%]+", "", width)) if width != "0" else 0 + height_num = float(re.sub(r"[a-zA-Z%]+", "", height)) if height != "0" else 0 + # Find and modify background elements elements_to_modify = [] - + # Check all paths, rectangles, and other elements for elem in root.iter(): - if elem.tag.endswith('path') or elem.tag.endswith('rect') or elem.tag.endswith('polygon'): + if ( + elem.tag.endswith("path") + or elem.tag.endswith("rect") + or elem.tag.endswith("polygon") + ): # Check if this element has a background-like fill - style = elem.get('style', '') - fill = elem.get('fill', '') - + style = elem.get("style", "") + fill = elem.get("fill", "") + # Look for background colors (light colors, white, etc.) - background_colors = ['#ffffff', '#f0ffff', 'white', 'lightblue', 'lightgray', 'lightgrey'] - + background_colors = [ + "#ffffff", + "#f0ffff", + "white", + "lightblue", + "lightgray", + "lightgrey", + ] + is_background = False current_fill = None - - if 'fill:' in style: + + if "fill:" in style: # Extract fill from style - fill_match = re.search(r'fill:\s*([^;]+)', style) + fill_match = re.search(r"fill:\s*([^;]+)", style) if fill_match: current_fill = fill_match.group(1).strip() elif fill: current_fill = fill - - if current_fill and any(bg_color in current_fill.lower() for bg_color in background_colors): + + if current_fill and any( + bg_color in current_fill.lower() for bg_color in background_colors + ): is_background = True - + # For paths, check if it covers a large area (likely background) - if elem.tag.endswith('path'): - d = elem.get('d', '') + if elem.tag.endswith("path"): + d = elem.get("d", "") # Simple heuristic: if path starts at 0,0 and covers large area, it's likely background - if 'M 0' in d and current_fill: + if "M 0" in d and current_fill: is_background = True - + # For rectangles, check if it covers the full canvas - if elem.tag.endswith('rect'): - x = float(elem.get('x', 0)) - y = float(elem.get('y', 0)) - w = float(elem.get('width', 0)) - h = float(elem.get('height', 0)) - + if elem.tag.endswith("rect"): + x = float(elem.get("x", 0)) + y = float(elem.get("y", 0)) + w = float(elem.get("width", 0)) + h = float(elem.get("height", 0)) + # If rectangle covers most/all of the canvas, it's likely background if x <= 1 and y <= 1 and w >= width_num * 0.9 and h >= height_num * 0.9: is_background = True - + if is_background: elements_to_modify.append(elem) - + # Modify the background elements for elem in elements_to_modify: - style = elem.get('style', '') - - if 'fill:' in style: + style = elem.get("style", "") + + if "fill:" in style: # Replace fill in style - new_style = re.sub(r'fill:\s*[^;]+', 'fill: transparent', style) - elem.set('style', new_style) - elif elem.get('fill'): + new_style = re.sub(r"fill:\s*[^;]+", "fill: transparent", style) + elem.set("style", new_style) + elif elem.get("fill"): # Replace fill attribute - elem.set('fill', 'transparent') - - + elem.set("fill", "transparent") + # Save the modified SVG - tree.write(output_path, encoding='unicode', xml_declaration=True) + tree.write(output_path, encoding="unicode", xml_declaration=True) + def make_all_svg_backgrounds_transparent(directory): """ Makes all SVG files in a directory have transparent backgrounds. """ - + svg_files = glob.glob(os.path.join(directory, "*.svg")) - + for svg_file in svg_files: make_svg_background_transparent(svg_file) - + print(f"Processed {len(svg_files)} SVG files") @@ -279,7 +293,6 @@ def run_pygenometracks(inifile, region, output_file, width): track_label_width=0.05, plot_regions=region, plot_width=width, - ) # Extract the chromosome, start, and end from the region @@ -289,11 +302,13 @@ def run_pygenometracks(inifile, region, output_file, width): fig = trp.plot(output_file, chrom, start, end) return trp - + except Exception as e: if "No valid intervals were found" in str(e): print(f"No valid intervals found in BED file for region {region[0]}") - print("This is expected when the BED file doesn't overlap with the query region.") + print( + "This is expected when the BED file doesn't overlap with the query region." + ) return None else: print(f"Error in run_pygenometracks: {e}") @@ -400,7 +415,6 @@ def make_scale(vals: list) -> list: return make_m(scaled) - def get_colors(sdf, ncolors, is_freq, custom_breakpoints): assert ncolors > 2 and ncolors < 12 try: @@ -1225,19 +1239,18 @@ def merge_annotation_tri(svg1_path, svg2_path, output_path, deraster, width): # Position if deraster: # Its not perfect for width > 18, but good enough - adjust_svg1 = (0,-5*(h1/6)) - adjust_svg2 = ((9 - (width/2)), 5*(h1/6)) + adjust_svg1 = (0, -5 * (h1 / 6)) + adjust_svg2 = ((9 - (width / 2)), 5 * (h1 / 6)) svg1.moveto(adjust_svg1[0], adjust_svg1[1]) - svg2.scale(1.077 + (width/1000)) + svg2.scale(1.077 + (width / 1000)) svg2.moveto(adjust_svg2[0], adjust_svg2[1]) else: - adjust_svg1 = (0,-5*(h1/6)) - adjust_svg2 = (10+(width/4.5), 5*(h1/6)) + adjust_svg1 = (0, -5 * (h1 / 6)) + adjust_svg2 = (10 + (width / 4.5), 5 * (h1 / 6)) svg1.moveto(adjust_svg1[0], adjust_svg1[1]) svg2.moveto(adjust_svg2[0], adjust_svg2[1]) - scaling_factor = width/2 - svg2.scale(1.034 + (scaling_factor/1000)) - + scaling_factor = width / 2 + svg2.scale(1.034 + (scaling_factor / 1000)) # Append and save fig.append([svg1, svg2]) @@ -1738,16 +1751,18 @@ def create_plots( # Check if the BED file has valid intervals for this region first bed_df = read_annotation_bed(annotation) chrom_name = name_x.split(":")[0] - + # Filter for the chromosome and region of interest valid_intervals = bed_df[ - (bed_df['chrom'] == chrom_name) & - (bed_df['end'] >= min_val) & - (bed_df['start'] <= xlim) + (bed_df["chrom"] == chrom_name) + & (bed_df["end"] >= min_val) + & (bed_df["start"] <= xlim) ] - + if valid_intervals.empty: - print(f"No valid intervals found in {annotation} for region {chrom_name}:{min_val}-{xlim}.\n") + print( + f"No valid intervals found in {annotation} for region {chrom_name}:{min_val}-{xlim}.\n" + ) print("Skipping annotation track generation.\n") else: bed_track = run_pygenometracks( @@ -1763,7 +1778,10 @@ def create_plots( xlim, ) bed_track.plot( - f"{iniprefix}_ANNOTATION_TRACK.png", name_x.split(":")[0], min_val, xlim + f"{iniprefix}_ANNOTATION_TRACK.png", + name_x.split(":")[0], + min_val, + xlim, ) print(f"\nAnnotation track saved to {iniprefix}_ANNOTATION_TRACK\n") @@ -1842,7 +1860,9 @@ def create_plots( # Self-identity plots: Output _TRI, _FULL, and _HIST else: if deraster: - print(f"Producing dotplots with derasterization turned off. This may take a while...\n") + print( + f"Producing dotplots with derasterization turned off. This may take a while...\n" + ) tri_plot = make_tri( sdf, plot_filename, @@ -1900,10 +1920,10 @@ def create_plots( if annotation: anno_prefix = f"{plot_filename}_PRE_ANNOTATED" annotated_tri = tri_plot[0] + theme( - axis_title_x = element_blank(), - axis_line_x = element_blank(), - axis_text_x = element_blank(), - axis_ticks_minor_x = element_blank(), + axis_title_x=element_blank(), + axis_line_x=element_blank(), + axis_text_x=element_blank(), + axis_ticks_minor_x=element_blank(), axis_ticks=element_blank(), ) ggsave( @@ -1956,7 +1976,7 @@ def create_plots( f"{iniprefix}_ANNOTATION_TRACK.svg", f"{tri_prefix}_ANNOTATED.svg", deraster, - width + width, ) try: if vector_format != "svg": From c9976c521f063c5502b0b6c184eff2c6ba25df1f Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 23:06:00 -0700 Subject: [PATCH 6/8] bump version, add cooler as dependency --- pyproject.toml | 1 + src/moddotplot/const.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4eba38c..7f7f39d 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "cairosvg", "pygenometracks", "svgutils", + "cooler", ] authors = [ {name = "Alex Sweeten", email = "alex.sweeten@nih.gov"}, diff --git a/src/moddotplot/const.py b/src/moddotplot/const.py index 1302336..e5332ea 100755 --- a/src/moddotplot/const.py +++ b/src/moddotplot/const.py @@ -1,4 +1,4 @@ -VERSION = "0.9.5" +VERSION = "0.9.6" COLS = [ "#query_name", "query_start", From 6f067d09464a6e40e77de00c4b5c7185acc4cffd Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 23:16:08 -0700 Subject: [PATCH 7/8] Delete intermediate annotation file --- src/moddotplot/static_plots.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/moddotplot/static_plots.py b/src/moddotplot/static_plots.py index b73fce8..a76f428 100755 --- a/src/moddotplot/static_plots.py +++ b/src/moddotplot/static_plots.py @@ -1978,6 +1978,8 @@ def create_plots( deraster, width, ) + if os.path.exists(f"{anno_prefix}.svg"): + os.remove(f"{anno_prefix}.svg") try: if vector_format != "svg": if vector_format == "pdf": From 63a9a3b5e0632f71d031498de16bdbb30fec95b7 Mon Sep 17 00:00:00 2001 From: Alex Sweeten Date: Tue, 19 Aug 2025 23:16:25 -0700 Subject: [PATCH 8/8] BIG readme overhaul --- README.md | 284 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 168 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index c4acb11..c4193bf 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,24 @@ [![PyPI](https://img.shields.io/pypi/v/ModDotPlot?color=blue&label=PyPI)](https://pypi.org/project/ModDotPlot/) [![CI](https://github.com/marbl/ModDotPlot/actions/workflows/black.yml/badge.svg)](https://github.com/marbl/ModDotPlot/actions/workflows/black.yml) +- [](#) - [Cite](#cite) - [About](#about) - [Installation](#installation) - [Usage](#usage) - - [Interactive Mode](#interactive-mode) - [Static Mode](#static-mode) + - [Interactive Mode](#interactive-mode) - [Standard arguments](#standard-arguments) - - [Interactive Mode Commands](#interactive-mode-commands) - [Static Mode Commands](#static-mode-commands) + - [Input/Output \& Formatting Commands](#inputoutput--formatting-commands) + - [Plot Customization Commands](#plot-customization-commands) + - [Sample run - Static Plots](#sample-run---static-plots) + - [Using a config file](#using-a-config-file) + - [Adding custom bed file annotations](#adding-custom-bed-file-annotations) + - [Comparing two sequences](#comparing-two-sequences) + - [Interactive Mode Commands](#interactive-mode-commands) - [Sample run - Interactive Mode](#sample-run---interactive-mode) - [Sample run - Port Forwarding](#sample-run---port-forwarding) - - [Sample run - Static Plots](#sample-run---static-plots) - - [Sample run - comparing two sequences](#sample-run---comparing-two-sequences) - [Questions](#questions) - [Known Issues](#known-issues) @@ -30,10 +35,12 @@ If you use ModDotPlot for your research, please cite our software! ## About -_ModDotPlot_ is a dot plot visualization tool designed for large sequences and whole genomes. _ModDotPlot_ outputs an identity heatmap similar to [StainedGlass](https://mrvollger.github.io/StainedGlass/) by rapidly approximating the Average Nucleotide Identity between pairwise combinations of genomic intervals. This significantly reduces the computational time required to produce these plots, enough to view multiple layers of resolution in real time! +_ModDotPlot_ is a dot plot visualization tool designed for large sequences and whole genomes. _ModDotPlot_ is the spiritual successor to [StainedGlass](https://mrvollger.github.io/StainedGlass/). The core algorithm breaks an input sequence down into intervals of sketched *k*-mers called **mod**imizers, and rapidly approximating the Average Nucleotide Identity between pairwise combinations of these intervals. This significantly reduces the computational time required to produce these plots, enough to view multiple layers of resolution in real time! ![](images/demo.gif) +If you're interested in learning more about ModDotPlot and tandem repeats, you can watch my in-depth [YouTube video tutorial](https://www.youtube.com/watch?v=_7sQaljB_ys&t=2321s&pp=ygUXYWxleCBzd2VldGVuIG1vZGRvdHBsb3Q%3D) from the [BioDiversity Genomics Academy](https://thebgacademy.org). + --- ## Installation @@ -67,7 +74,7 @@ Finally, confirm that the installation was installed correctly and that your ver | | | | (_) | (_| | | |__| | (_) | |_ | | | | (_) | |_ |_| |_|\___/ \__,_| |_____/ \___/ \__| |_| |_|\___/ \__| - v0.9.4 + v0.9.6 usage: moddotplot [-h] {interactive,static} ... @@ -85,15 +92,7 @@ options: ## Usage -_ModDotPlot_ must be run either in `interactive` mode, or `static` mode: - -### Interactive Mode - -``` -moddotplot interactive -``` - -This will launch a [Dash application](https://plotly.com/dash/) on your machine's localhost. Open any web browser and go to `http://127.0.0.1:` to view the interactive plot (this should happen automatically, but depending on your environment you might need to copy and paste this URL into your web browser). Running `Ctrl+C` on the command line will exit the Dash application. The default port number used by Dash is `8050`, but this can be customized using the `--port` command (see [interactive mode commands](#interactive-mode-commands) for further info). +_ModDotPlot_ must be run either in `static` mode, or `interactive` mode: ### Static Mode @@ -101,16 +100,27 @@ This will launch a [Dash application](https://plotly.com/dash/) on your machine' moddotplot static ``` -Running _ModDotPlot_ in static mode skips running Dash and quickly creates plots under the specified output directory `-o`. By default, running _ModDotPlot_ in static mode this will produce the following files: +Running _ModDotPlot_ in static mode quickly create plots under the specified output directory `-o`. By default, running _ModDotPlot_ in static mode this will produce the following files: - A paired-end bed file `.bedpe`, containing intervals alongside their corresponding identity estimates. - A self-identity dotplot for each sequence, as both an upper triangle matrix `_TRI` and full matrix `_FULL` representation. - A histogram of identity values for each sequence. +![](images/moddotplot_output.png) + All plots and histograms are output in a vectorized (default: `.svg`) and rasterized `.png` image. [Plotnine](https://plotnine.readthedocs.io/en/v0.12.4/) is the Python plotting library used, with [CairoSVG](https://cairosvg.org) used for converting between image formats. _ModDotPlot_ supports highly customizable plotting features in static mode. See [static mode commands](#static-mode-commands) for a complete list of features. + +### Interactive Mode + +``` +moddotplot interactive +``` + +Running _ModDotPlot_ in interactive mode will launch a [Dash application](https://plotly.com/dash/) on your machine's localhost. Open any web browser and go to `http://127.0.0.1:` to view the interactive plot (this should happen automatically, but depending on your environment you might need to copy and paste this URL into your web browser). Running `Ctrl+C` on the command line will exit the Dash application. The default port number used by Dash is `8050`, but this can be customized using the `--port` command (see [interactive mode commands](#interactive-mode-commands) for further info). + --- ### Standard arguments @@ -123,7 +133,7 @@ Fasta files to input. Multifasta files are accepted. Interactive mode will only `-b / --bed <.bed file>` -Input bedfile used for dotplot annotation (note: this is not the same as the paired-end bed file produced by ModDotPlot). If selected, this will produce an annotated bedtrack image `_ANNOTATION.svg`. Name in the bedfile must match the name of the fasta sequence header (or input, if `-l` is used instead) in order to produce a correct bed track. +Input bedfile used for dotplot annotation (note: this is not the same as the paired-end bed file produced by ModDotPlot). If selected, this will produce an annotated bedtrack image `_ANNOTATION_TRACK.svg` in static mode, and open an IGV js track in the interactive mode Dash application. The name in the bedfile must match the name of the fasta sequence header in order to produce a correct bed track. `-k / --kmer ` @@ -143,7 +153,7 @@ Each partition takes into account a fraction of its neighboring partitions k-mer `-m / --modimizer ` -Modimizer sketch size. Must be lower than window size `w`. A lower sketch size means less k-mers to compare (and faster runtime), at the expense of lower accuracy. Recommended to be kept > 1000. +Modimizer sketch size. Must be lower than window size `w`. A lower sketch size means less k-mers to compare (and faster runtime), at the expense of lower accuracy. Recommended to be kept >= 1000. `-r / --resolution ` @@ -151,59 +161,33 @@ Dotplot resolution. This corresponds to the number of windows each input sequenc `--compare ` -If set when 2 or more sequences are input into ModDotPlot, this will show an a vs. b style plot, in addition to a self-identity plot. Note that interactive mode currently only supports a maximum of two sequences. If more than two sequences are input, only the first two will be shown. +If set when 2 or more sequences are input into ModDotPlot, this will show an A vs. B style plot, in addition to a self-identity plot. Note that interactive mode currently only supports a maximum of two sequences. If more than two sequences are input, only the first two will be shown. `--compare-only ` -If set when 2 or more sequences are input into ModDotPlot, this will show an a vs. b style plot, without showing self-identity plots. +If set when 2 or more sequences are input into ModDotPlot, this will show an A vs. B style plot, without showing self-identity plots. `--ambiguous ` By default, k-mers that are homopolymers of ambiguous IUPAC codes (eg. NNNNNNNNNNN’s) are excluded from identity estimation. This results in gaps along the central diagonal for these regions. If desired, these can be kept by setting the `—-ambiguous` flag in both interactive and static mode. -`--no-plot ` - -Save matrix to file, but skip rendering of plots. In interactive mode, this must be used alongside the `--save` flag - ---- - -### Interactive Mode Commands - -`--port ` - -Port to display ModDotPlot on. Default is 8050, this can be changed to any accepted port. - -`-w / --window ` - -Minimum window size. By default, interactive mode sets a minimum window size based on the sequence length `n/2000` (eg. a 3Mbp sequence will have a 1500bp window). The maximum window size will always be set to `n/1000` (3000bp under the same example). This means that 2 matrices will be created. - -`-q / --quick ` - -This will automatically run interactive mode with a minimum window size equal to the maximum window size (`n/1000`). This will result in a quick launch, however the resolution of the plot will not improve upon zooming in. - -`-s / --save ` - -Save the matrices produced in interactive mode. By default, a folder called `interactive_matrices` will be saved in `--output_dir`, containing each matrix in compressed NumPy format, as well as metadata for each matrix in a pickle. Modifying the files in `interactive_matrices` will cause errors when attempting to load them in the future. - -`-l / --load ` - -Load previously saved matrices. Used instead of `-f/--fasta` - --- ### Static Mode Commands -`-c / --config <.json file>` - -Run `moddotplot static` with a config file, rather than (sample syntax). Recommended when creating a really customized plot. Used instead of `-f/--fasta`. +#### Input/Output & Formatting Commands `-l / --load <.bedpe file>` Create a plot from a previously computed pairwise bed file. Skips Average Nucleotide Identity computation. Used instead of `-f/--fasta`. Will only accept paired-end bed files produced by ModDotPlot. -`-w / --window ` +`-c / --config <.json file>` -Window size. Unlike interactive mode, only one matrix will be created, so this represents the *only* window size. Default is set to `n/1000` (eg. 3000bp for a 3Mbp sequence). +Run moddotplot static with a config file instead of command line args. Example syntax in `config/config.json`. Recommended when creating a really customized plot. Used instead of -f/--fasta. + +`--cooler ` + +If set, will output a matrix as a cooler file for each input sequence, in addition to a bedpe file. `--no-bedpe ` @@ -213,6 +197,10 @@ Skip output of bed file. Skip output of histogram legend. +`--no-plot ` + +Save .bedpe to file, but skip rendering of plots. + `--width ` Adjust width of self dot plots. Default is 9 inches. @@ -227,7 +215,13 @@ Vectorized image format to output to. Must be one of ["svg", "pdf", "ps"]. Defau `--deraster ` -By default, vectorized ouptuts rasterize the actual plot (not the axis). This is done to save space, as a high-resolution dotplot can be extremely space inefficient and prevent use of image manipulation software. This plot rasterization can be removed using this flag. +By default, vectorized outputs rasterize the actual dotplot (not the axis). This is done to save space, as a high-resolution dotplot can be extremely space inefficient and prevent use of image manipulation software. This plot rasterization can be removed using this flag. + +#### Plot Customization Commands + +`-w / --window ` + +Window size. Unlike interactive mode, only one matrix will be created, so this represents the *only* window size. Default is set to `n/1000` (eg. 3000bp for a 3Mbp sequence). `--palette ` @@ -257,13 +251,37 @@ Change axis limits for x and y axis. Useful when comparing multiple plots, allow By default, histograms are evenly spaced based on the number of colors and the identity threshold. Select this argument to bin based on the frequency of observed identity values. ---- +### Sample run - Static Plots -### Sample run - Interactive Mode +#### Using a config file + +When running _ModDotPlot_ to produce static plots, it is recommended to use a config file. The config file is provided in JSON, and accepts the same syntax as the command line arguments shown above. Here is an sample run using a centromeric sequence of _Arabadopsis thaliana_: ``` -$ moddotplot interactive -f sequences/Chr1_cen.fa +$ cat config/config.json +{ + "identity": 90, + "palette": "Blues_7", + "breakpoints": [ + 90, + 91, + 92, + 93, + 96, + 98, + 99, + 100 + ], + "output_dir": "Arabadopsis", + "fasta": [ + "sequences/Arabadopsis_chr1_centromere.fa" + ] +} +``` + +``` +$ moddotplot static -c config/config.json __ __ _ _____ _ _____ _ _ | \/ | | | | __ \ | | | __ \| | | | | \ / | ___ __| | | | | | ___ | |_ | |__) | | ___ | |_ @@ -271,84 +289,111 @@ $ moddotplot interactive -f sequences/Chr1_cen.fa | | | | (_) | (_| | | |__| | (_) | |_ | | | | (_) | |_ |_| |_|\___/ \__,_| |_____/ \___/ \__| |_| |_|\___/ \__| -Running ModDotPlot in interactive mode +Running ModDotPlot in static mode -Retrieving k-mers from Chr1:14000000-18000000.... +Retrieving k-mers from Chr1:14000001-18000000.... Progress: |████████████████████████████████████████| 100.0% Completed -Chr1:14000000-18000000 k-mers retrieved! +Chr1:14000001-18000000 k-mers retrieved! -Building self-identity matrices for Chr1:14000000-18000000, using a minimum window size of 2000.... +Computing self identity matrix for Chr1:14000001-18000000... -Layer 1 using window length 2000 + Sequence length n: 4000000 -Progress: |████████████████████████████████████████| 100.0% Completed + Window size w: 4000 + Modimizer sketch size: 1000 -Layer 2 using window length 4000 + Plot Resolution r: 1000 Progress: |████████████████████████████████████████| 100.0% Completed -ModDotPlot interactive mode is successfully running on http://127.0.0.1:8050/ +Saved self-identity matrix as a paired-end bed file to Arabadopsis/Chr1:14000001-18000000/Chr1:14000001-18000000.bedpe -Dash is running on http://127.0.0.1:8050/ +Triangle plots, full plots, and histogram for Arabadopsis/Chr1:14000001-18000000/Chr1:14000001-18000000 saved sucessfully. ``` +![](images/Chr1:14000001-18000000_FULL.png) -![](images/chr1_screenshot.png) +Using `samtools faidx` will result in a genomic range being added to a fasta file's header (eg. in the above sequence, the header is Chr1:14000001-18000000). _ModDotPlot_ will parse this syntax to add the appropriate axis. -The plotly plot can be navigated using the zoom (magnifying glass) and pan (hand) icons. The plot can be reset by double-clicking or selecting the home button. The identity threshold can be modified by seelcting the slider. Colors can be readjusted according to the same gradient based on the new identity levels. +#### Adding custom bed file annotations -### Sample run - Port Forwarding +If providing a custom annotation file using `--bed/b`, _ModDotPlot_ will output additional files: -Running interactive mode on an HPC environment can be accomplished through the use of port forwarding. On your remote server, run ModDotPlot as normal: +- An annotation track `_ANNOTATION_TRACK`, containing . Colors for ranges are set using the 9th column of the bedfile. +- The annotation track overlayed with a self-identity dotplot `_ANNOTATED` for each sequence present in the annotation track. ``` -moddotplot interactive -f INPUT_FASTA_FILE(S) --port HPC_PORT_NUMBER +$ moddotplot static -f sequences/HG002_chr13_MATERNAL:1-4000000.fa -b config/hg002v1.1.cenSatv2.0.bed + __ __ _ _____ _ _____ _ _ + | \/ | | | | __ \ | | | __ \| | | | + | \ / | ___ __| | | | | | ___ | |_ | |__) | | ___ | |_ + | |\/| |/ _ \ / _` | | | | |/ _ \| __| | ___/| |/ _ \| __| + | | | | (_) | (_| | | |__| | (_) | |_ | | | | (_) | |_ + |_| |_|\___/ \__,_| |_____/ \___/ \__| |_| |_|\___/ \__| + +Running ModDotPlot in static mode + +... + +Annotation track saved to chr13_MATERNAL:1-4000000/chr13_MATERNAL:1-4000000_ANNOTATION_TRACK + +Triangle plots, full plots, and histogram for chr13_MATERNAL:1-4000000/chr13_MATERNAL:1-4000000 saved sucessfully. + ``` +![](images/chr13_MATERNAL:1-4000000_TRI_ANNOTATED.png) -Then on your local machine, set up a port forwarding tunnel: + +#### Comparing two sequences + +![](images/moddotplot_comparative.png) + +ModDotPlot can produce an a vs. b style dotplot for each pairwise combination of input sequences. Use the `--compare` command line argument to include these plots. When running `--compare` in interactive mode, a dropdown menu will appear, allowing the user to switch between self-identity and pairwise plots. Note that a maximum of two sequences are allowed in interactive mode. If you want to skip the creation of self-identity plots, you can use `--compare-only`: ``` -ssh -N -f -L :127.0.0.1: HPC@LOGIN.CREDENTIALS +moddotplot static -f sequences/*_MATERNAL*.fa --compare-only ``` -You should now be able to view interactive mode using `http://127.0.0.1:`. Note that your own HPC environment may have specific instructions and/or restrictions for setting up port forwarding. +![](images/chr13_MATERNAL:1-4000000_chr14_MATERNAL:1-4000000_COMPARE.png) -VSCode now has automatic port forwarding built into the terminal menu. See [VSCode documentation](https://code.visualstudio.com/docs/editor/port-forwarding) for further details +--- -![](images/portforwarding.png) +### Interactive Mode Commands -### Sample run - Static Plots +`--port ` -When running ModDotPlot to produce static plots, it is recommended to use a config file, especially when creating extremely customized plots. The config file is provided in JSON, and accepts the same syntax as the command line arguments shown above. +Port to display ModDotPlot on. Default is 8050, this can be changed to any accepted port. -``` -$ cat config/config.json +`-w / --window ` -{ - "identity": 90, - "palette": "OrRd_7", - "breakpoints": [ - 90, - 91, - 92, - 93, - 96, - 98, - 99, - 100 - ], - "output_dir": "Chr1_cen_plots", - "fasta": [ - "../sequences/Chr1_cen.fa" - ] -} -``` +Minimum window size. By default, interactive mode sets a minimum window size based on the sequence length `n/2000` (eg. a 3Mbp sequence will have a 1500bp window). The maximum window size will always be set to `n/1000` (3000bp under the same example). This means that 2 matrices will be created. + +`-q / --quick ` + +This will automatically run interactive mode with a minimum window size equal to the maximum window size (`n/1000`). This will result in a quick launch, however the resolution of the plot will not improve upon zooming in. + +`-s / --save ` + +Save the matrices produced in interactive mode. By default, a folder called `interactive_matrices` will be saved in `--output_dir`, containing each matrix in compressed NumPy format, as well as metadata for each matrix in a pickle. Modifying the files in `interactive_matrices` will cause errors when attempting to load them in the future. + +`--no-plot ` + +Save .bedpe to file, but skip rendering of plots. Must be used with `--save`. + +`-l / --load ` + +Load previously saved matrices. Used instead of `-f/--fasta`. + + +--- + +### Sample run - Interactive Mode ``` -$ moddotplot static -c config/config.json +$ moddotplot interactive -f sequences/Chr1_cen.fa + __ __ _ _____ _ _____ _ _ | \/ | | | | __ \ | | | __ \| | | | | \ / | ___ __| | | | | | ___ | |_ | |__) | | ___ | |_ @@ -356,7 +401,7 @@ $ moddotplot static -c config/config.json | | | | (_) | (_| | | |__| | (_) | |_ | | | | (_) | |_ |_| |_|\___/ \__,_| |_____/ \___/ \__| |_| |_|\___/ \__| -Running ModDotPlot in static mode +Running ModDotPlot in interactive mode Retrieving k-mers from Chr1:14000000-18000000.... @@ -364,39 +409,46 @@ Progress: |███████████████████████ Chr1:14000000-18000000 k-mers retrieved! -Computing self identity matrix for chr1:14000000-18000000... +Building self-identity matrices for Chr1:14000000-18000000, using a minimum window size of 2000.... - Sequence length n: 4000000 +Layer 1 using window length 2000 - Window size w: 4000 +Progress: |████████████████████████████████████████| 100.0% Completed - Modimizer sketch value: 1000 - Plot Resolution r: 1000 +Layer 2 using window length 4000 Progress: |████████████████████████████████████████| 100.0% Completed -Saved bed file to Chr1_cen_plots/Chr1:14000000-18000000.bed - -Plots created! Saving to Chr1_cen_plots/Chr1:14000000-18000000... +ModDotPlot interactive mode is successfully running on http://127.0.0.1:8050/ -Chr1_cen_plots/Chr1:14M-18M_TRI.png, Chr1_cen_plots/Chr1:14M-18M_TRI.pdf, Chr1_cen_plots/Chr1:14M-18M_FULL.png, Chr1_cen_plots/Chr1:14M-18M_FULL.png, Chr1_cen_plots/Chr1:14M-18M_HIST.png and Chr1_cen_plots/Chr1:14M-18M_HIST.pdf, saved sucessfully. +Dash is running on http://127.0.0.1:8050/ ``` -![](images/Chr1:14000000-18000000_FULL.png) ---- +![](images/chr1_screenshot.png) +The plotly plot can be navigated using the zoom (magnifying glass) and pan (hand) icons. The plot can be reset by double-clicking or selecting the home button. The identity threshold can be modified by seelcting the slider. Colors can be readjusted according to the same gradient based on the new identity levels. -### Sample run - comparing two sequences +### Sample run - Port Forwarding -ModDotPlot can produce an a vs. b style dotplot for each pairwise combination of input sequences. Use the `--compare` command line argument to include these plots. When running `--compare` in interactive mode, a dropdown menu will appear, allowing the user to switch between self-identity and pairwise plots. Note that a maximum of two sequences are allowed in interactive mode. If you want to skip the creation of self-identity plots, you can use `--compare-only`: +Running interactive mode on an HPC environment can be accomplished through the use of port forwarding. On your remote server, run ModDotPlot as normal: ``` -moddotplot interactive -f sequences/chr15_segment.fa sequences/chr21_segment.fa --compare-only +moddotplot interactive -f INPUT_FASTA_FILE(S) --port HPC_PORT_NUMBER +``` + +Then on your local machine, set up a port forwarding tunnel: + +``` +ssh -N -f -L :127.0.0.1: HPC@LOGIN.CREDENTIALS ``` -![](images/chr14:2000000-5000000_chr21:2000000-5000000.png) +You should now be able to view interactive mode using `http://127.0.0.1:`. Note that your own HPC environment may have specific instructions and/or restrictions for setting up port forwarding. + +VSCode now has automatic port forwarding built into the terminal menu. See [VSCode documentation](https://code.visualstudio.com/docs/editor/port-forwarding) for further details + +![](images/portforwarding.png) ---