# pyLocusZoom - Getting Started

This notebook demonstrates the key features of pyLocusZoom for creating regional association plots.

In [None]:
# Install pyLocusZoom if needed
%pip install pylocuszoom

In [None]:
import numpy as np
import pandas as pd
from pylocuszoom import LocusZoomPlotter

## 1. Create Sample Data

First, let's create some synthetic GWAS and gene data for demonstration.

In [None]:
# Generate synthetic GWAS data
np.random.seed(42)
n_snps = 500
positions = np.sort(np.random.randint(1_000_000, 2_000_000, n_snps))

# Create a peak around position 1,500,000
p_values = np.ones(n_snps) * 0.5
peak_center = 1_500_000
for i, pos in enumerate(positions):
    dist = abs(pos - peak_center)
    if dist < 100_000:
        p_values[i] = 10 ** -(8 * np.exp(-dist / 30_000))
    else:
        p_values[i] = np.random.uniform(0.01, 1)

gwas_df = pd.DataFrame({
    'ps': positions,
    'p_wald': p_values,
    'rs': [f'rs{i}' for i in range(n_snps)],
})

print(f"GWAS data: {len(gwas_df)} SNPs")
gwas_df.head()

In [None]:
# Create gene annotations
genes_df = pd.DataFrame({
    'chr': ['1', '1', '1', '1'],
    'start': [1_100_000, 1_400_000, 1_550_000, 1_800_000],
    'end': [1_200_000, 1_520_000, 1_650_000, 1_900_000],
    'gene_name': ['GENE1', 'GENE2', 'GENE3', 'GENE4'],
    'strand': ['+', '-', '+', '-'],
})

# Create exon annotations
exons_df = pd.DataFrame({
    'chr': ['1', '1', '1', '1', '1', '1'],
    'start': [1_100_000, 1_150_000, 1_400_000, 1_450_000, 1_550_000, 1_600_000],
    'end': [1_120_000, 1_170_000, 1_420_000, 1_470_000, 1_580_000, 1_630_000],
    'gene_name': ['GENE1', 'GENE1', 'GENE2', 'GENE2', 'GENE3', 'GENE3'],
})

print("Genes:")
genes_df

## 2. Basic Plot (Matplotlib)

Create a basic regional association plot using the default matplotlib backend.

In [None]:
# Initialize plotter (default: matplotlib backend)
plotter = LocusZoomPlotter(species="canine", log_level=None)

# Create plot
fig = plotter.plot(
    gwas_df,
    chrom=1,
    start=1_000_000,
    end=2_000_000,
    lead_pos=1_500_000,
    genes_df=genes_df,
    exons_df=exons_df,
    show_recombination=False,  # Disable for demo
    snp_labels=True,
    label_top_n=3,
)

fig.show()

## 3. Interactive Plot (Plotly)

Create an interactive plot with hover tooltips using the plotly backend.

In [None]:
# Initialize plotter with plotly backend
plotter_interactive = LocusZoomPlotter(
    species="canine", 
    backend="plotly",
    log_level=None,
)

# Create interactive plot
fig = plotter_interactive.plot(
    gwas_df,
    chrom=1,
    start=1_000_000,
    end=2_000_000,
    lead_pos=1_500_000,
    show_recombination=False,
)

# Display in notebook (hover over points to see SNP info)
fig.show()

## 4. Stacked Plots

Compare multiple GWAS results (e.g., different phenotypes) in a stacked layout.

In [None]:
# Create a second GWAS with different peak
gwas_df2 = gwas_df.copy()
gwas_df2['p_wald'] = np.ones(n_snps) * 0.5
peak_center2 = 1_700_000
for i, pos in enumerate(positions):
    dist = abs(pos - peak_center2)
    if dist < 80_000:
        gwas_df2.loc[i, 'p_wald'] = 10 ** -(6 * np.exp(-dist / 25_000))
    else:
        gwas_df2.loc[i, 'p_wald'] = np.random.uniform(0.05, 1)

# Create stacked plot
plotter = LocusZoomPlotter(species="canine", log_level=None)

fig = plotter.plot_stacked(
    [gwas_df, gwas_df2],
    chrom=1,
    start=1_000_000,
    end=2_000_000,
    panel_labels=["Phenotype A", "Phenotype B"],
    genes_df=genes_df,
    show_recombination=False,
    label_top_n=2,
)

fig.show()

## 5. eQTL Overlay

Add expression QTL data as a separate panel to compare GWAS and eQTL signals.

In [None]:
# Create synthetic eQTL data
eqtl_df = pd.DataFrame({
    'pos': [1_480_000, 1_500_000, 1_520_000, 1_550_000, 1_600_000],
    'p_value': [1e-5, 1e-7, 1e-6, 1e-4, 0.01],
    'gene': ['GENE2', 'GENE2', 'GENE2', 'GENE3', 'GENE3'],
})

# Create plot with eQTL panel
fig = plotter.plot_stacked(
    [gwas_df],
    chrom=1,
    start=1_000_000,
    end=2_000_000,
    eqtl_df=eqtl_df,
    eqtl_gene="GENE2",
    genes_df=genes_df,
    show_recombination=False,
)

fig.show()

## 6. Saving Plots

Save plots to various formats.

In [None]:
# Matplotlib: save to PNG, PDF, SVG
plotter = LocusZoomPlotter(species="canine", log_level=None)
fig = plotter.plot(gwas_df, chrom=1, start=1_000_000, end=2_000_000, show_recombination=False)

fig.savefig("regional_plot.png", dpi=150, bbox_inches='tight')
fig.savefig("regional_plot.pdf", bbox_inches='tight')
print("Saved: regional_plot.png, regional_plot.pdf")

# Plotly: save to HTML (interactive) or PNG (static)
plotter_plotly = LocusZoomPlotter(species="canine", backend="plotly", log_level=None)
fig = plotter_plotly.plot(gwas_df, chrom=1, start=1_000_000, end=2_000_000, show_recombination=False)

fig.write_html("regional_plot_interactive.html")
print("Saved: regional_plot_interactive.html")

## 7. Species Options

pyLocusZoom supports canine (*Canis lupus familiaris*), feline (*Felis catus*), and custom species.

In [None]:
# Canine (default) - has built-in recombination maps
plotter_canine = LocusZoomPlotter(species="canine")

# Canine with CanFam4 genome build
plotter_canfam4 = LocusZoomPlotter(species="canine", genome_build="canfam4")

# Feline - LD and gene tracks (user provides recombination data)
plotter_feline = LocusZoomPlotter(species="feline")

# Custom species - provide all reference data
plotter_custom = LocusZoomPlotter(species=None)

print("Species configurations created successfully!")

## Summary

pyLocusZoom provides:

- **Multiple backends**: matplotlib (static), plotly (interactive), bokeh (dashboards)
- **Stacked plots**: Compare multiple GWAS/phenotypes
- **eQTL overlay**: Expression QTL data integration
- **Species support**: Canine, feline, or custom species
- **PySpark support**: Handle large-scale genomics data

For more details, see the [documentation](https://github.com/michael-denyer/pyLocusZoom).