# Visualization Demo

This notebook demonstrates the visualization capabilities:
- Time series plots
- Scatter plots with regression
- Histograms and distributions
- Box plots
- Correlation heatmaps
- Styled tables

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
from pathlib import Path
from datetime import datetime, timedelta

sys.path.insert(0, str(Path.cwd().parent.parent))

from scripts.visualization import (
    # Plots
    time_series_plot, scatter_plot, histogram, box_plot, correlation_heatmap,
    # Tables
    summary_table, styled_dataframe, export_table, pivot_table,
    display_full_table, paginated_table, display_scrollable_table,
    # Themes
    set_theme, get_palette, list_themes, preview_palette, save_figure
)

%matplotlib inline

# Set visualization theme - try 'light', 'dark', or 'publication'
set_theme('light')

# See available themes and palettes
print("Available themes and palettes:")
for category, items in list_themes().items():
    print(f"  {category}: {items}")

In [None]:
# Generate sample water quality data for demonstration
np.random.seed(42)
n_samples = 365

dates = pd.date_range('2024-01-01', periods=n_samples, freq='D')

# Simulate seasonal patterns
seasonal = 10 * np.sin(2 * np.pi * np.arange(n_samples) / 365)

df = pd.DataFrame({
    'date': dates,
    'site': np.random.choice(['Upper Basin', 'Mid Basin', 'Lower Basin'], n_samples),
    'temperature': 15 + seasonal + np.random.normal(0, 2, n_samples),
    'ph': 7.5 + np.random.normal(0, 0.3, n_samples),
    'dissolved_oxygen': 8 - 0.2 * seasonal + np.random.normal(0, 0.5, n_samples),
    'turbidity': np.random.exponential(5, n_samples),
    'discharge': 100 + 50 * np.sin(2 * np.pi * np.arange(n_samples) / 365 + 1) + np.random.normal(0, 20, n_samples),
})

df.head()

## Time Series Plots

In [None]:
# Basic time series
fig = time_series_plot(
    df, 
    date_col='date', 
    value_col='temperature',
    title='Water Temperature Over Time',
    ylabel='Temperature (°C)'
)

In [None]:
# With rolling average
fig = time_series_plot(
    df, 
    date_col='date', 
    value_col='temperature',
    title='Water Temperature with 30-Day Moving Average',
    ylabel='Temperature (°C)',
    rolling_window=30
)

In [None]:
# Grouped by site
fig = time_series_plot(
    df, 
    date_col='date', 
    value_col='discharge',
    group_col='site',
    title='Discharge by Site',
    ylabel='Discharge (cfs)'
)

## Scatter Plots

In [None]:
# Temperature vs Dissolved Oxygen
fig = scatter_plot(
    df,
    x_col='temperature',
    y_col='dissolved_oxygen',
    title='Temperature vs Dissolved Oxygen',
    xlabel='Temperature (°C)',
    ylabel='Dissolved Oxygen (mg/L)',
    show_regression=True
)

In [None]:
# Colored by site
fig = scatter_plot(
    df,
    x_col='temperature',
    y_col='dissolved_oxygen',
    color_col='site',
    title='Temperature vs DO by Site',
    xlabel='Temperature (°C)',
    ylabel='Dissolved Oxygen (mg/L)',
)

## Histograms and Distributions

In [None]:
# pH distribution with statistics
fig = histogram(
    df,
    column='ph',
    title='pH Distribution',
    xlabel='pH',
    show_stats=True,
    show_kde=True
)

In [None]:
# Turbidity distribution (skewed data)
fig = histogram(
    df,
    column='turbidity',
    title='Turbidity Distribution',
    xlabel='Turbidity (NTU)',
    bins=50,
    show_stats=True
)

In [None]:
# Grouped histogram
fig = histogram(
    df,
    column='temperature',
    group_col='site',
    title='Temperature Distribution by Site',
    xlabel='Temperature (°C)'
)

## Box Plots

In [None]:
# Compare sites
fig = box_plot(
    df,
    value_col='dissolved_oxygen',
    group_col='site',
    title='Dissolved Oxygen by Site',
    ylabel='DO (mg/L)',
    show_points=True
)

In [None]:
# Monthly comparison
df['month'] = df['date'].dt.month_name()

fig = box_plot(
    df,
    value_col='temperature',
    group_col='month',
    title='Temperature by Month',
    ylabel='Temperature (°C)'
)

## Correlation Analysis

In [None]:
# Correlation heatmap
fig = correlation_heatmap(
    df,
    columns=['temperature', 'ph', 'dissolved_oxygen', 'turbidity', 'discharge'],
    title='Water Quality Parameter Correlations'
)

## Tables and Summaries

In [None]:
# Summary statistics
summary = summary_table(
    df,
    columns=['temperature', 'ph', 'dissolved_oxygen', 'turbidity', 'discharge'],
)
summary

In [None]:
# Summary by site
site_summary = summary_table(
    df,
    columns=['temperature', 'ph', 'dissolved_oxygen'],
    group_by='site'
)
site_summary

In [None]:
# Styled dataframe
monthly_avg = df.groupby('site')[['temperature', 'ph', 'dissolved_oxygen', 'discharge']].mean().reset_index()

styled = styled_dataframe(
    monthly_avg,
    highlight_max=['temperature', 'discharge'],
    highlight_min=['ph'],
    gradient_columns=['dissolved_oxygen'],
    format_dict={
        'temperature': '{:.1f}°C',
        'ph': '{:.2f}',
        'dissolved_oxygen': '{:.1f} mg/L',
        'discharge': '{:.0f} cfs'
    },
    caption='Average Water Quality by Site'
)
styled

In [None]:
# Pivot table
df['month_num'] = df['date'].dt.month

pivot = pivot_table(
    df,
    values='temperature',
    index='site',
    columns='month_num',
    aggfunc='mean',
    margins=True
)
pivot.round(1)

In [None]:
# Export table to multiple formats
paths = export_table(
    monthly_avg,
    name='site_averages',
    formats=['csv', 'html', 'markdown']
)
print("Exported to:")
for fmt, path in paths.items():
    print(f"  {fmt}: {path}")

## Full Table Display Options

For large DataFrames, you can display without truncation or use pagination.

In [None]:
# Display full table without truncation
# Useful when you want to see all rows
print("Full table display (showing first 50 rows for demo):")
display_full_table(df.head(50))

In [None]:
# Paginated view - great for exploring large datasets
print("Paginated view (page 1):")
paginated_table(df, page_size=10, page=1)

In [None]:
# Scrollable HTML table with sticky headers
# Perfect for dashboards and reports
display_scrollable_table(
    df.head(100),
    height='300px',
    caption='Water Quality Measurements (Scrollable)'
)

## Saving Figures

In [None]:
# Save a figure to file
output_dir = Path.cwd().parent.parent / 'data' / 'outputs'

fig = time_series_plot(
    df,
    date_col='date',
    value_col='discharge',
    title='Annual Discharge Pattern',
    ylabel='Discharge (cfs)',
    rolling_window=7,
    save_path=output_dir / 'discharge_timeseries.png'
)
print(f"Saved to {output_dir / 'discharge_timeseries.png'}")