# PrettyPlot Examples

This notebook demonstrates the various plotting functions available in the PrettyPlot library with multiple examples for each function.

## Setup

In [None]:
import prettyplot as pp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set publication style globally
pp.set_publication_style()

# For inline display in Jupyter
%matplotlib inline

---

## 1. Bar Plots

The `barplot()` function creates publication-ready bar plots with optional grouping, error bars, and hatch patterns.

### Example 1.1: Simple Bar Plot

In [None]:
# Create sample data
data = pd.DataFrame({
    'category': ['A', 'B', 'C', 'D'],
    'value': [23, 45, 38, 52]
})

# Create simple bar plot
fig, ax = pp.barplot(
    data=data,
    x='category',
    y='value',
    title='Simple Bar Plot',
    xlabel='Category',
    ylabel='Value',
    palette='pastel_categorical'
)

plt.show()

### Example 1.2: Bar Plot with Multiple Measurements (Error Bars)

In [None]:
# Create data with multiple measurements per category
np.random.seed(42)
data_with_error = pd.DataFrame({
    'category': np.repeat(['Control', 'Treatment A', 'Treatment B'], 10),
    'measurement': np.concatenate([
        np.random.normal(100, 15, 10),
        np.random.normal(120, 12, 10),
        np.random.normal(135, 18, 10)
    ])
})

# Create bar plot with error bars (standard error)
fig, ax = pp.barplot(
    data=data_with_error,
    x='category',
    y='measurement',
    title='Bar Plot with Error Bars',
    xlabel='Treatment Group',
    ylabel='Measurement (a.u.)',
    errorbar='se',  # standard error
    capsize=0.1,
    palette='pastel_categorical'
)

plt.show()

### Example 1.3: Grouped Bar Plot with Hue

In [None]:
# Create grouped data
np.random.seed(123)
grouped_data = pd.DataFrame({
    'time': np.repeat(['Day 1', 'Day 2', 'Day 3'], 20),
    'group': np.tile(np.repeat(['Control', 'Treated'], 10), 3),
    'response': np.concatenate([
        # Day 1
        np.random.normal(50, 10, 10),  # Control
        np.random.normal(55, 10, 10),  # Treated
        # Day 2
        np.random.normal(52, 10, 10),  # Control
        np.random.normal(70, 12, 10),  # Treated
        # Day 3
        np.random.normal(54, 10, 10),  # Control
        np.random.normal(85, 15, 10),  # Treated
    ])
})

# Create grouped bar plot
fig, ax = pp.barplot(
    data=grouped_data,
    x='time',
    y='response',
    hue='group',
    title='Time Course Experiment',
    xlabel='Time Point',
    ylabel='Response',
    errorbar='se',
    capsize=0.1,
    palette={'Control': '#8E8EC1', 'Treated': '#75B375'}
)

plt.show()

### Example 1.4: Split Bar Plot with Hatch Patterns

In [None]:
# Create data for split bars
np.random.seed(456)
split_data = pd.DataFrame({
    'condition': np.repeat(['Low', 'Medium', 'High'], 30),
    'treatment': np.tile(np.repeat(['Placebo', 'Drug A', 'Drug B'], 10), 3),
    'efficacy': np.concatenate([
        # Low
        np.random.normal(30, 5, 10),  # Placebo
        np.random.normal(45, 6, 10),  # Drug A
        np.random.normal(50, 7, 10),  # Drug B
        # Medium
        np.random.normal(35, 5, 10),  # Placebo
        np.random.normal(60, 8, 10),  # Drug A
        np.random.normal(68, 9, 10),  # Drug B
        # High
        np.random.normal(38, 5, 10),  # Placebo
        np.random.normal(75, 10, 10), # Drug A
        np.random.normal(85, 12, 10), # Drug B
    ])
})

# Create split bar plot with hatch patterns
fig, ax = pp.barplot(
    data=split_data,
    x='condition',
    y='efficacy',
    hue='condition',
    split='treatment',
    title='Treatment Efficacy by Condition',
    xlabel='Condition',
    ylabel='Efficacy Score',
    errorbar='se',
    capsize=0.1,
    hatch_mapping={'Placebo': '', 'Drug A': '///', 'Drug B': '\\\\\\'},
    palette={'Low': '#FFB6B9', 'Medium': '#FEC8D8', 'High': '#957DAD'},
    figsize=(8, 5)
)

plt.show()

### Example 1.5: Custom Ordering and Alpha

In [None]:
# Create data
np.random.seed(789)
custom_data = pd.DataFrame({
    'stage': np.repeat(['Stage III', 'Stage I', 'Stage IV', 'Stage II'], 15),
    'score': np.concatenate([
        np.random.normal(65, 10, 15),  # Stage III
        np.random.normal(40, 8, 15),   # Stage I
        np.random.normal(85, 12, 15),  # Stage IV
        np.random.normal(52, 9, 15),   # Stage II
    ])
})

# Create bar plot with custom order and higher alpha (more opaque)
fig, ax = pp.barplot(
    data=custom_data,
    x='stage',
    y='score',
    order=['Stage I', 'Stage II', 'Stage III', 'Stage IV'],  # Custom order
    title='Disease Progression by Stage',
    xlabel='Disease Stage',
    ylabel='Disease Score',
    errorbar='sd',  # standard deviation
    capsize=0.15,
    alpha=0.3,  # More opaque
    linewidth=2.5,
    palette='pastel_categorical'
)

plt.show()

---

## 2. Circle Heatmaps

The `circle_heatmap()` function creates scatter-based heatmaps with distinctive double-layer markers, supporting both categorical and continuous color mapping.

### Example 2.1: Basic Circle Heatmap with Categorical Colors

In [None]:
# Create sample enrichment data
heatmap_data = pd.DataFrame({
    'pathway': ['Pathway A', 'Pathway A', 'Pathway A', 
                'Pathway B', 'Pathway B', 'Pathway B',
                'Pathway C', 'Pathway C', 'Pathway C'],
    'condition': ['Ctrl', 'Treatment 1', 'Treatment 2'] * 3,
    'neg_log10_pval': [1.5, 3.2, 4.5, 2.1, 5.3, 6.8, 0.8, 2.5, 3.9],
    'category': ['Metabolism', 'Metabolism', 'Metabolism',
                 'Signaling', 'Signaling', 'Signaling',
                 'Transport', 'Transport', 'Transport']
})

# Create circle heatmap
ax, cbar = pp.circle_heatmap(
    data=heatmap_data,
    x='condition',
    y='pathway',
    size='neg_log10_pval',
    hue='category',
    title='Pathway Enrichment Analysis',
    xlabel='Condition',
    ylabel='Pathway',
    size_legend_title='-log10(p-value)',
    palette={'Metabolism': '#E8A0BF', 'Signaling': '#BA90C6', 'Transport': '#C0DBEA'},
    figsize=(8, 5)
)

plt.show()

### Example 2.2: Circle Heatmap with Continuous Color Scale

In [None]:
# Create gene expression data
genes = ['GENE1', 'GENE2', 'GENE3', 'GENE4', 'GENE5']
samples = ['Sample A', 'Sample B', 'Sample C', 'Sample D']

np.random.seed(42)
expression_data = []
for gene in genes:
    for sample in samples:
        expression_data.append({
            'gene': gene,
            'sample': sample,
            'log2fc': np.random.uniform(-3, 3),
            'abs_effect': np.random.uniform(0.5, 5)
        })

expression_df = pd.DataFrame(expression_data)

# Create circle heatmap with continuous color scale
ax, cbar = pp.circle_heatmap(
    data=expression_df,
    x='sample',
    y='gene',
    size='abs_effect',
    hue='log2fc',
    hue_norm=(-3, 3),  # Enables continuous color mapping
    palette='RdBu_r',  # Red-Blue diverging colormap
    title='Gene Expression Heatmap',
    xlabel='Sample',
    ylabel='Gene',
    size_legend_title='Effect Size',
    hue_legend_title='log2(Fold Change)',
    sizes=(100, 1000),
    alpha=0.2,
    figsize=(8, 6)
)

plt.show()

### Example 2.3: Circle Heatmap with Grid and Custom Styling

In [None]:
# Create correlation-like data
variables = ['Var1', 'Var2', 'Var3', 'Var4']
np.random.seed(100)

corr_data = []
for i, var1 in enumerate(variables):
    for j, var2 in enumerate(variables):
        if i != j:  # Skip diagonal
            corr_data.append({
                'variable1': var1,
                'variable2': var2,
                'correlation': np.random.uniform(-1, 1),
                'significance': np.random.uniform(0, 5)
            })

corr_df = pd.DataFrame(corr_data)

# Create circle heatmap with grid
ax, cbar = pp.circle_heatmap(
    data=corr_df,
    x='variable1',
    y='variable2',
    size='significance',
    hue='correlation',
    hue_norm=(-1, 1),
    palette='coolwarm',
    title='Correlation Matrix',
    xlabel='Variable',
    ylabel='Variable',
    size_legend_title='-log10(p)',
    hue_legend_title='Correlation',
    grid=True,  # Add grid
    grid_kws={'alpha': 0.3, 'linestyle': '--', 'linewidth': 0.5},
    invert_y=False,  # Don't invert y-axis
    alpha=0.15,
    linewidth=2.5,
    figsize=(7, 7)
)

plt.show()

### Example 2.4: Large Circle Heatmap with Custom Ordering

In [None]:
# Create larger dataset
tissues = ['Brain', 'Liver', 'Heart', 'Kidney', 'Lung', 'Muscle']
treatments = ['T0', 'T1', 'T2', 'T3', 'T4']

np.random.seed(200)
large_data = []
for tissue in tissues:
    for treatment in treatments:
        large_data.append({
            'tissue': tissue,
            'treatment': treatment,
            'expression': np.random.uniform(1, 10),
            'tissue_type': 'Neural' if tissue == 'Brain' else 
                          'Organ' if tissue in ['Liver', 'Heart', 'Kidney', 'Lung'] else 'Muscle'
        })

large_df = pd.DataFrame(large_data)

# Create large circle heatmap with custom ordering
ax, cbar = pp.circle_heatmap(
    data=large_df,
    x='treatment',
    y='tissue',
    size='expression',
    hue='tissue_type',
    y_order=['Brain', 'Heart', 'Lung', 'Liver', 'Kidney', 'Muscle'],  # Custom order
    title='Tissue-Specific Expression',
    xlabel='Time Point',
    ylabel='Tissue',
    size_legend_title='Expression',
    palette={'Neural': '#FF6B6B', 'Organ': '#4ECDC4', 'Muscle': '#FFE66D'},
    sizes=(50, 800),
    alpha=0.25,
    figsize=(8, 8)
)

plt.show()

### Example 2.5: Circle Heatmap with Viridis Palette

In [None]:
# Create intensity data
np.random.seed(300)
proteins = [f'Protein{i}' for i in range(1, 7)]
timepoints = ['0h', '2h', '4h', '8h', '12h', '24h']

intensity_data = []
for protein in proteins:
    for time in timepoints:
        intensity_data.append({
            'protein': protein,
            'time': time,
            'intensity': np.random.uniform(5, 15),
            'fold_change': np.random.uniform(0, 10)
        })

intensity_df = pd.DataFrame(intensity_data)

# Create circle heatmap with viridis
ax, cbar = pp.circle_heatmap(
    data=intensity_df,
    x='time',
    y='protein',
    size='fold_change',
    hue='intensity',
    hue_norm=(5, 15),
    palette='viridis',
    title='Protein Dynamics Over Time',
    xlabel='Time',
    ylabel='Protein',
    size_legend_title='Fold Change',
    hue_legend_title='Intensity',
    alpha=0.2,
    linewidth=2,
    figsize=(10, 7),
    cbar_kws={'shrink': 0.5, 'aspect': 10}
)

plt.show()

---

## 3. Venn Diagrams

The `venn_diagram()` function creates 2-way or 3-way Venn diagrams with optional statistical analysis.

### Example 3.1: Simple 2-Way Venn Diagram

In [None]:
# Create two sample sets
set1 = set(range(1, 51))  # 1-50
set2 = set(range(30, 81)) # 30-80

# Create 2-way Venn diagram
fig, ax, stats = pp.venn_diagram(
    sets=[set1, set2],
    labels=['Group A', 'Group B'],
    colors=pp.get_palette('pastel_categorical', n_colors=2),
    alpha=0.4
)

print("\nOverlap Statistics:")
print(f"Set sizes: {stats['set_sizes']}")
print(f"Overlap: {stats['overlap']}")

plt.show()

### Example 3.2: 2-Way Venn with Statistical Testing

In [None]:
# Create sets with significant overlap
genes_disease = set([f'GENE{i}' for i in range(1, 101)])  # 100 disease genes
genes_pathway = set([f'GENE{i}' for i in range(50, 151)])  # 100 pathway genes

# Create Venn diagram with statistical testing
fig, ax, stats = pp.venn_diagram(
    sets={'Disease Genes': genes_disease, 'Pathway Genes': genes_pathway},
    universe_size=20000,  # Total number of genes in genome
    colors=['#FFB3BA', '#BAE1FF'],
    alpha=0.5,
    figsize=(10, 7)
)

print("\nStatistical Analysis:")
print(f"Overlap: {stats['overlap']}")
print(f"Expected overlap: {stats['expected_overlap']:.2f}")
print(f"Fold enrichment: {stats['fold_enrichment']:.2f}x")
print(f"P-value: {stats['p_value']:.2e}")
print(f"Significant: {stats['significant']}")

plt.show()

### Example 3.3: Simple 3-Way Venn Diagram

In [None]:
# Create three sample sets
setA = set(range(1, 61))   # 1-60
setB = set(range(40, 101)) # 40-100
setC = set(range(70, 131)) # 70-130

# Create 3-way Venn diagram
fig, ax, stats = pp.venn_diagram(
    sets=[setA, setB, setC],
    labels=['Set A', 'Set B', 'Set C'],
    colors=pp.get_palette('pastel_categorical', n_colors=3),
    alpha=0.35,
    figsize=(11, 7)
)

print("\n3-Way Overlap Statistics:")
print(f"Set sizes: {stats['set_sizes']}")
print(f"Triple overlap: {stats['triple_overlap']}")
print(f"Unique counts: {stats['unique_counts']}")
print(f"Pairwise overlaps: {stats['pairwise_overlaps']}")

plt.show()

### Example 3.4: 3-Way Venn with Statistical Testing

In [None]:
# Create biological example: transcription factor binding sites
tf1_targets = set([f'GENE{i}' for i in range(1, 201)])    # 200 genes
tf2_targets = set([f'GENE{i}' for i in range(100, 301)])  # 200 genes
tf3_targets = set([f'GENE{i}' for i in range(150, 351)])  # 200 genes

# Create 3-way Venn with statistics
fig, ax, stats = pp.venn_diagram(
    sets={'TF1': tf1_targets, 'TF2': tf2_targets, 'TF3': tf3_targets},
    universe_size=20000,
    colors=['#FFAAA5', '#FFD3B6', '#A8E6CF'],
    alpha=0.4,
    figsize=(12, 8)
)

print("\nTranscription Factor Overlap Analysis:")
print(f"Triple overlap: {stats['triple_overlap']}")
print(f"Expected triple overlap: {stats['expected_triple_overlap']:.2f}")
print(f"Fold enrichment: {stats['fold_enrichment']:.2f}x")
print(f"P-value: {stats['p_value']:.2e}")

plt.show()

### Example 3.5: Weighted vs Unweighted Venn Diagrams

In [None]:
# Create sets with very different sizes
small_set = set(range(1, 21))      # 20 elements
medium_set = set(range(15, 66))    # 50 elements
large_set = set(range(50, 201))    # 150 elements

# Create side-by-side comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Unweighted (equal areas)
fig, ax1, stats1 = pp.venn_diagram(
    sets=[small_set, medium_set, large_set],
    labels=['Small (20)', 'Medium (50)', 'Large (150)'],
    weighted=False,
    colors=['#FF6B6B', '#4ECDC4', '#45B7D1'],
    alpha=0.35,
    ax=ax1
)
ax1.set_title('Unweighted (Equal Areas)', fontsize=14, fontweight='bold')

# Weighted (proportional to set sizes)
fig, ax2, stats2 = pp.venn_diagram(
    sets=[small_set, medium_set, large_set],
    labels=['Small (20)', 'Medium (50)', 'Large (150)'],
    weighted=True,
    colors=['#FF6B6B', '#4ECDC4', '#45B7D1'],
    alpha=0.35,
    ax=ax2
)
ax2.set_title('Weighted (Proportional Areas)', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

print("\nNote: Unweighted layouts show all regions with equal area,")
print("while weighted layouts scale regions proportional to set sizes.")

---

## Summary

This notebook demonstrated:

### Bar Plots
- Simple bar plots with single categories
- Bar plots with error bars (standard error/deviation)
- Grouped bar plots with hue parameter
- Split bar plots with hatch patterns
- Custom ordering and transparency settings

### Circle Heatmaps
- Basic categorical color mapping
- Continuous color scales with colorbars
- Grid styling and customization
- Custom ordering and large datasets
- Various color palettes (RdBu_r, coolwarm, viridis)

### Venn Diagrams
- 2-way and 3-way Venn diagrams
- Statistical significance testing
- Weighted vs unweighted layouts
- Custom colors and transparency
- Using dictionaries for labeled sets

For more information, see the [PrettyPlot documentation](https://github.com/jorgebotas/prettyplot).