# Goals
**[Script]** Plot genomic tracks using [gggenomes](https://thackl.github.io/gggenomes/articles/gggenomes.html)

**[Overall]** This is a quick and efficient way to compare features (genes, transcripts, exons, or CDSs) across multiple references, simply by providing a list of genes or transcripts, or a specified region

**[To Do]**
1. Add plot for multiple features (ex. CDS + UTRs)
2. Add sequence comparison with `seqkit` (clustalo section) + `ggmsa`
    - Ex: ZXDC has the same length across all three human refs, but the sequence is different in RefSeq T2T
4. Try **[pyGenomeViz](https://github.com/moshi4/pyGenomeViz):** Python package like `gggenomes` (Genbank/GFF format, streamlit app, can align/BLAST)

# Packages

In [2]:
#####################
### Data Cleaning ###
#####################
library("data.table")
library("janitor")
library("rlang")

###########
### Viz ###
###########
library("tidyverse")
library("paletteer")
library("gggenomes")

####################
### Session Info ###
####################
library("sessioninfo")

## Options

In [None]:
options(warn = 1)
options(repr.matrix.max.cols=200, repr.matrix.max.rows=200)

theme_set(theme_bw(base_size=18, base_family="sans"))

## Functions

In [None]:
source("./References.R")

# Parameters

## Inputs

In [None]:
species = "Homo_sapiens"  # Genus_species or species1_and_species2
primary_chr = as.character(c(1:22,"X","Y"))
output_path = file.path("../outputs", species)

ref_names = c("Ensembl GRCh38","RefSeq GRCh38","RefSeq T2T")  # Order in all plots
ref_colors = c("#008080","#DAA520","#6A5ACD")  # teal, goldenrod, slateblue (purplish)

## Outputs

In [None]:
track_path = file.path("../results/plots", species, "specific_tracks")
dir.create(track_path, recursive=TRUE, showWarnings=FALSE)

# Plot track by gene name
- **Note:** If gene name is missing from a reference, check synonyms

In [None]:
gene_ann_paths = get_paths(ref_names, file_id="gene.sorted.tsv", ref_path=output_path)
df_gene = concat_files(gene_ann_paths, ref_names, ref_colors, to_chr=primary_chr)

fig_gene_name = plot_track(
    df_gene,
    refs_to_plot = ref_names,
    feats_to_plot = c("ADARB2", "AFF2", "AFF3"),
    feat_type = "gene_name",
    plot_path = track_path,
    color = "basetheme::deepblue",
    show_track_info = FALSE,
    w = 6, h = 8, v = 1.25
)

IRdisplay::display_html(paste0("<img src=",fig_gene_name$path, ">"))

# Plot track by genomic region

In [None]:
fig_region = plot_track(
    df_gene,
    refs_to_plot = c("RefSeq T2T"),
    feats_to_plot = c("19:47600000-47750000"),
    feat_type = "region",
    plot_path = track_path,
    color = "basetheme::deepblue",
    show_track_info = FALSE,
    w = 6, h = 2.5, v = 1.25)

IRdisplay::display_html(paste0("<img src=",fig_region$path, ">"))

# Plot track for specific feature

## Transcripts by gene name

In [None]:
tx_ann_paths = get_paths(ref_names, file_id="transcript.sorted.tsv", ref_path=output_path)
df_tx = concat_files(tx_ann_paths, ref_names, ref_colors, to_chr=primary_chr)

fig_transcripts = plot_track(
    df_tx,
    refs_to_plot = c("RefSeq T2T"),
    feats_to_plot = c("APOE"),
    feat_type = "transcript_id",
    plot_path = track_path,
    color = "basetheme::deepblue",
    show_track_info = FALSE,
    w = 6, h = 5, v = 1.25)

IRdisplay::display_html(paste0("<img src=",fig_transcripts$path, ">"))

## Exons by transcript name

In [None]:
exon_ann_paths = get_paths(ref_names, file_id="exon.sorted.tsv", ref_path=output_path)
df_exon = concat_files(exon_ann_paths, ref_names, ref_colors, to_chr=primary_chr)

fig_exons = plot_track(
    df_exon,
    refs_to_plot = ref_names,
    feats_to_plot = c("ENST00000252486", "NM_000041.4"),
    feat_type = "exon",
    plot_path = track_path,
    color = "basetheme::deepblue",
    show_track_info = FALSE,
    w = 6.5, h = 3, v = 1.25)

IRdisplay::display_html(paste0("<img src=",fig_exons$path, ">"))

## CDS by transcript name

In [None]:
cds_ann_paths = get_paths(ref_names, file_id="cds.sorted.tsv", ref_path=output_path)
df_cds = concat_files(cds_ann_paths, ref_names, ref_colors, to_chr=primary_chr)

fig_cds = plot_track(
    df_cds,
    refs_to_plot = ref_names,
    feats_to_plot = c("ENST00000252486", "NM_000041.4"),
    feat_type = "cds",
    plot_path = track_path,
    color = "basetheme::deepblue",
    show_track_info = FALSE,
    w = 6.5, h = 3, v = 1.25)

IRdisplay::display_html(paste0("<img src=",fig_cds$path, ">"))

# Session info

In [3]:
si = session_info(pkg="attached", to_file=FALSE)
as_tibble(si$platform) %>% select(version, os, system, date)
as_tibble(si$packages) %>% select(package, loadedversion)

version,os,system,date
<chr>,<chr>,<chr>,<chr>
R version 4.4.0 (2024-04-24),Ubuntu 22.04.4 LTS,"x86_64, linux-gnu",2025-08-11


package,loadedversion
<chr>,<chr>
data.table,1.15.0
dplyr,1.1.4
forcats,1.0.0
gggenomes,1.0.1
ggplot2,3.5.1
janitor,2.2.0
lubridate,1.9.3
paletteer,1.6.0
purrr,1.0.2
readr,2.1.5
