# Data Visualization Demo

This notebook demonstrates comprehensive visualization capabilities for the Data Analysis Platform.

## Features:
- Statistical visualizations with matplotlib and seaborn
- Interactive plots with plotly
- Business dashboard components
- Custom styling and themes
- Export capabilities for presentations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set visualization themes
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Custom color palettes
BUSINESS_COLORS = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D', '#592941']
TECH_COLORS = ['#4ECDC4', '#44A08D', '#093637', '#F38BA8', '#FFB3C6']
ACADEMIC_COLORS = ['#264653', '#2A9D8F', '#E9C46A', '#F4A261', '#E76F51']

print("📊 Visualization libraries loaded successfully!")

## 1. Generate Sample Dataset

In [None]:
# Generate comprehensive sample dataset
np.random.seed(42)
n_samples = 1000

data = {
    'date': pd.date_range('2023-01-01', periods=n_samples, freq='D'),
    'revenue': np.random.normal(50000, 15000, n_samples),
    'customers': np.random.poisson(100, n_samples),
    'conversion_rate': np.random.beta(2, 8, n_samples),
    'region': np.random.choice(['North', 'South', 'East', 'West'], n_samples),
    'product_category': np.random.choice(['Electronics', 'Clothing', 'Home', 'Sports'], n_samples),
    'marketing_spend': np.random.exponential(5000, n_samples),
    'satisfaction_score': np.random.normal(4.2, 0.8, n_samples)
}

# Add some correlations and trends
data['revenue'] = data['revenue'] + data['marketing_spend'] * 0.3
data['satisfaction_score'] = np.clip(data['satisfaction_score'], 1, 5)

df = pd.DataFrame(data)
df['month'] = df['date'].dt.month
df['quarter'] = df['date'].dt.quarter

print(f"📈 Generated dataset with {len(df)} records")
print(f"📅 Date range: {df['date'].min()} to {df['date'].max()}")
df.head()

## 2. Statistical Visualizations

In [None]:
# Distribution Analysis
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Statistical Distribution Analysis', fontsize=16, fontweight='bold')

# Revenue distribution
sns.histplot(data=df, x='revenue', kde=True, ax=axes[0,0], color=BUSINESS_COLORS[0])
axes[0,0].set_title('Revenue Distribution')
axes[0,0].set_xlabel('Revenue ($)')

# Customer count distribution
sns.boxplot(data=df, x='region', y='customers', ax=axes[0,1], palette=BUSINESS_COLORS)
axes[0,1].set_title('Customer Distribution by Region')
axes[0,1].tick_params(axis='x', rotation=45)

# Conversion rate by product category
sns.violinplot(data=df, x='product_category', y='conversion_rate', ax=axes[1,0], palette=TECH_COLORS)
axes[1,0].set_title('Conversion Rate by Product Category')
axes[1,0].tick_params(axis='x', rotation=45)

# Satisfaction score distribution
sns.histplot(data=df, x='satisfaction_score', bins=20, ax=axes[1,1], color=ACADEMIC_COLORS[0])
axes[1,1].set_title('Customer Satisfaction Distribution')
axes[1,1].set_xlabel('Satisfaction Score (1-5)')

plt.tight_layout()
plt.show()

In [None]:
# Correlation Analysis
numeric_cols = ['revenue', 'customers', 'conversion_rate', 'marketing_spend', 'satisfaction_score']
correlation_matrix = df[numeric_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, 
            annot=True, 
            cmap='RdYlBu_r', 
            center=0,
            square=True,
            fmt='.3f',
            cbar_kws={'shrink': 0.8})
plt.title('Feature Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print("🔍 Strong correlations found:")
correlations = correlation_matrix.abs().unstack().sort_values(ascending=False)
correlations = correlations[correlations < 1.0]
print(correlations.head(3))

## 3. Time Series Analysis

In [None]:
# Monthly trends
monthly_data = df.groupby(df['date'].dt.to_period('M')).agg({
    'revenue': 'sum',
    'customers': 'sum',
    'conversion_rate': 'mean',
    'satisfaction_score': 'mean'
}).reset_index()
monthly_data['date'] = monthly_data['date'].dt.to_timestamp()

fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Business Metrics Trends Over Time', fontsize=16, fontweight='bold')

# Revenue trend
axes[0,0].plot(monthly_data['date'], monthly_data['revenue'], 
               color=BUSINESS_COLORS[0], linewidth=2, marker='o')
axes[0,0].set_title('Monthly Revenue Trend')
axes[0,0].set_ylabel('Total Revenue ($)')
axes[0,0].tick_params(axis='x', rotation=45)

# Customer acquisition trend
axes[0,1].plot(monthly_data['date'], monthly_data['customers'], 
               color=BUSINESS_COLORS[1], linewidth=2, marker='s')
axes[0,1].set_title('Monthly Customer Acquisition')
axes[0,1].set_ylabel('Total Customers')
axes[0,1].tick_params(axis='x', rotation=45)

# Conversion rate trend
axes[1,0].plot(monthly_data['date'], monthly_data['conversion_rate'], 
               color=BUSINESS_COLORS[2], linewidth=2, marker='^')
axes[1,0].set_title('Average Conversion Rate')
axes[1,0].set_ylabel('Conversion Rate')
axes[1,0].tick_params(axis='x', rotation=45)

# Satisfaction trend
axes[1,1].plot(monthly_data['date'], monthly_data['satisfaction_score'], 
               color=BUSINESS_COLORS[3], linewidth=2, marker='d')
axes[1,1].set_title('Average Customer Satisfaction')
axes[1,1].set_ylabel('Satisfaction Score')
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 4. Interactive Plotly Visualizations

In [None]:
# Interactive scatter plot with multiple dimensions
fig = px.scatter(df, 
                x='marketing_spend', 
                y='revenue',
                size='customers',
                color='region',
                hover_data=['satisfaction_score', 'conversion_rate'],
                title='Revenue vs Marketing Spend (Interactive)',
                color_discrete_sequence=BUSINESS_COLORS)

fig.update_layout(
    width=800,
    height=600,
    title_font_size=16
)

fig.show()

print("🎯 Interactive features:")
print("- Hover for detailed metrics")
print("- Click legend to filter regions")
print("- Zoom and pan for exploration")

In [None]:
# Interactive time series with multiple metrics
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Revenue Over Time', 'Customer Acquisition', 
                   'Conversion Rate', 'Satisfaction Score'),
    vertical_spacing=0.08
)

# Add traces
fig.add_trace(
    go.Scatter(x=monthly_data['date'], y=monthly_data['revenue'],
              mode='lines+markers', name='Revenue',
              line=dict(color=BUSINESS_COLORS[0], width=3)),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=monthly_data['date'], y=monthly_data['customers'],
              mode='lines+markers', name='Customers',
              line=dict(color=BUSINESS_COLORS[1], width=3)),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(x=monthly_data['date'], y=monthly_data['conversion_rate'],
              mode='lines+markers', name='Conversion Rate',
              line=dict(color=BUSINESS_COLORS[2], width=3)),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x=monthly_data['date'], y=monthly_data['satisfaction_score'],
              mode='lines+markers', name='Satisfaction',
              line=dict(color=BUSINESS_COLORS[3], width=3)),
    row=2, col=2
)

fig.update_layout(
    height=600,
    showlegend=False,
    title_text="Business Metrics Dashboard (Interactive)",
    title_x=0.5
)

fig.show()

## 5. Business Dashboard Components

In [None]:
# KPI Dashboard
def create_kpi_card(value, title, change_pct=None, format_type='currency'):
    """Create a KPI display card"""
    if format_type == 'currency':
        formatted_value = f"${value:,.0f}"
    elif format_type == 'percentage':
        formatted_value = f"{value:.2%}"
    else:
        formatted_value = f"{value:,.0f}"
    
    change_text = ""
    if change_pct is not None:
        arrow = "↗" if change_pct > 0 else "↘"
        change_text = f" {arrow} {abs(change_pct):.1f}%"
    
    return f"{title}: {formatted_value}{change_text}"

# Calculate KPIs
total_revenue = df['revenue'].sum()
total_customers = df['customers'].sum()
avg_conversion = df['conversion_rate'].mean()
avg_satisfaction = df['satisfaction_score'].mean()
total_marketing = df['marketing_spend'].sum()

print("📊 KEY PERFORMANCE INDICATORS")
print("=" * 40)
print(create_kpi_card(total_revenue, "Total Revenue", 5.2))
print(create_kpi_card(total_customers, "Total Customers", 12.3, 'number'))
print(create_kpi_card(avg_conversion, "Avg Conversion Rate", -2.1, 'percentage'))
print(create_kpi_card(avg_satisfaction, "Avg Satisfaction", 1.8, 'number'))
print(create_kpi_card(total_marketing, "Marketing Spend", 8.7))
print(f"ROI: {(total_revenue / total_marketing):.2f}x")

In [None]:
# Regional Performance Dashboard
regional_summary = df.groupby('region').agg({
    'revenue': ['sum', 'mean'],
    'customers': 'sum',
    'conversion_rate': 'mean',
    'satisfaction_score': 'mean',
    'marketing_spend': 'sum'
}).round(2)

# Flatten column names
regional_summary.columns = ['_'.join(col).strip() for col in regional_summary.columns]
regional_summary = regional_summary.reset_index()

# Create performance heatmap
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Revenue by region (bar chart)
sns.barplot(data=df, x='region', y='revenue', ax=axes[0], palette=BUSINESS_COLORS)
axes[0].set_title('Average Revenue by Region')
axes[0].set_ylabel('Revenue ($)')

# Performance heatmap
performance_metrics = regional_summary[['region', 'revenue_mean', 'conversion_rate_mean', 'satisfaction_score_mean']]
performance_metrics = performance_metrics.set_index('region')

# Normalize for heatmap
performance_normalized = (performance_metrics - performance_metrics.min()) / (performance_metrics.max() - performance_metrics.min())

sns.heatmap(performance_normalized.T, 
            annot=True, 
            cmap='RdYlGn', 
            ax=axes[1],
            fmt='.2f',
            cbar_kws={'label': 'Performance Score (Normalized)'})
axes[1].set_title('Regional Performance Heatmap')
axes[1].set_xlabel('Region')

plt.tight_layout()
plt.show()

print("\n🏆 Regional Performance Summary:")
print(regional_summary)

## 6. Advanced Analytics Visualizations

In [None]:
# Customer Segmentation Analysis
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Prepare features for clustering
features = ['revenue', 'customers', 'conversion_rate', 'satisfaction_score']
X = df[features].copy()

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform clustering
kmeans = KMeans(n_clusters=4, random_state=42)
df['segment'] = kmeans.fit_predict(X_scaled)

# Create segment labels
segment_labels = {0: 'High Value', 1: 'Growth Potential', 2: 'At Risk', 3: 'Standard'}
df['segment_label'] = df['segment'].map(segment_labels)

# Visualize segments
fig = px.scatter_3d(df, 
                   x='revenue', 
                   y='customers', 
                   z='satisfaction_score',
                   color='segment_label',
                   size='conversion_rate',
                   title='Customer Segmentation (3D Analysis)',
                   color_discrete_sequence=BUSINESS_COLORS)

fig.update_layout(scene=dict(
    xaxis_title='Revenue',
    yaxis_title='Customers',
    zaxis_title='Satisfaction Score'
))

fig.show()

# Segment analysis
segment_analysis = df.groupby('segment_label').agg({
    'revenue': ['mean', 'count'],
    'customers': 'mean',
    'conversion_rate': 'mean',
    'satisfaction_score': 'mean'
}).round(2)

print("\n🎯 Customer Segment Analysis:")
print(segment_analysis)

## 7. Export and Presentation Tools

In [None]:
# Create executive summary visualization
def create_executive_dashboard():
    """Create a comprehensive executive dashboard"""
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=('Revenue Trend', 'Regional Performance', 
                       'Customer Satisfaction', 'Marketing ROI',
                       'Product Category Performance', 'Key Metrics'),
        specs=[[{"secondary_y": False}, {"type": "bar"}],
               [{"type": "scatter"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "indicator"}]],
        vertical_spacing=0.08
    )
    
    # Revenue trend
    fig.add_trace(
        go.Scatter(x=monthly_data['date'], y=monthly_data['revenue'],
                  mode='lines+markers', name='Revenue',
                  line=dict(color=BUSINESS_COLORS[0], width=3)),
        row=1, col=1
    )
    
    # Regional performance
    regional_rev = df.groupby('region')['revenue'].sum()
    fig.add_trace(
        go.Bar(x=regional_rev.index, y=regional_rev.values,
              marker_color=BUSINESS_COLORS[1]),
        row=1, col=2
    )
    
    # Customer satisfaction scatter
    fig.add_trace(
        go.Scatter(x=df['revenue'], y=df['satisfaction_score'],
                  mode='markers', 
                  marker=dict(color=BUSINESS_COLORS[2], size=5, opacity=0.6)),
        row=2, col=1
    )
    
    # Marketing ROI by region
    roi_by_region = df.groupby('region').apply(lambda x: x['revenue'].sum() / x['marketing_spend'].sum())
    fig.add_trace(
        go.Bar(x=roi_by_region.index, y=roi_by_region.values,
              marker_color=BUSINESS_COLORS[3]),
        row=2, col=2
    )
    
    # Product category performance
    category_rev = df.groupby('product_category')['revenue'].sum()
    fig.add_trace(
        go.Bar(x=category_rev.index, y=category_rev.values,
              marker_color=BUSINESS_COLORS[4]),
        row=3, col=1
    )
    
    # Key metric indicator
    fig.add_trace(
        go.Indicator(
            mode="number+gauge+delta",
            value=total_revenue/1000000,
            domain={'x': [0, 1], 'y': [0, 1]},
            title={"text": "Revenue (M$)"},
            gauge={'axis': {'range': [None, 100]},
                  'bar': {'color': BUSINESS_COLORS[0]},
                  'bgcolor': "white",
                  'borderwidth': 2,
                  'bordercolor': "gray"},
            delta={'reference': 45}
        ),
        row=3, col=2
    )
    
    fig.update_layout(
        height=900,
        showlegend=False,
        title_text="Executive Dashboard - Data Analysis Platform",
        title_x=0.5,
        title_font_size=20
    )
    
    return fig

# Create and display executive dashboard
exec_dashboard = create_executive_dashboard()
exec_dashboard.show()

print("\n📋 Executive Dashboard Created!")
print("Features:")
print("- Revenue trend analysis")
print("- Regional performance comparison")
print("- Customer satisfaction correlation")
print("- Marketing ROI by region")
print("- Product category breakdown")
print("- Key performance indicator gauge")

In [None]:
# Export capabilities demonstration
import os

# Create exports directory if it doesn't exist
export_dir = '../exports'
os.makedirs(export_dir, exist_ok=True)

# Export sample static plots
def export_visualizations():
    """Export key visualizations for presentations"""
    
    # 1. Revenue trend chart
    plt.figure(figsize=(12, 6))
    plt.plot(monthly_data['date'], monthly_data['revenue'], 
             color=BUSINESS_COLORS[0], linewidth=3, marker='o', markersize=6)
    plt.title('Monthly Revenue Trend', fontsize=16, fontweight='bold')
    plt.ylabel('Revenue ($)', fontsize=12)
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(f'{export_dir}/revenue_trend.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 2. Regional performance chart
    plt.figure(figsize=(10, 6))
    regional_data = df.groupby('region')['revenue'].sum()
    bars = plt.bar(regional_data.index, regional_data.values, color=BUSINESS_COLORS)
    plt.title('Revenue by Region', fontsize=16, fontweight='bold')
    plt.ylabel('Total Revenue ($)', fontsize=12)
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'${height:,.0f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.savefig(f'{export_dir}/regional_performance.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 3. Export interactive dashboard as HTML
    exec_dashboard.write_html(f'{export_dir}/executive_dashboard.html')
    
    return {
        'static_charts': ['revenue_trend.png', 'regional_performance.png'],
        'interactive': 'executive_dashboard.html',
        'export_path': export_dir
    }

# Perform exports
exports = export_visualizations()

print("\n💾 Visualization Export Complete!")
print(f"📁 Export directory: {exports['export_path']}")
print("📊 Static charts:")
for chart in exports['static_charts']:
    print(f"  - {chart}")
print(f"🌐 Interactive dashboard: {exports['interactive']}")
print("\n✅ Files ready for presentations and web deployment!")

## Summary

This visualization demo showcases the comprehensive capabilities of the Data Analysis Platform:

### 📊 Visualization Types Covered:
1. **Statistical Analysis**: Distributions, correlations, box plots, violin plots
2. **Time Series**: Trend analysis, seasonal patterns, multi-metric dashboards
3. **Interactive Plots**: Plotly-based exploration tools with hover details
4. **Business Dashboards**: KPI cards, performance heatmaps, ROI analysis
5. **Advanced Analytics**: Customer segmentation, 3D visualizations
6. **Executive Reporting**: Multi-panel dashboards with key metrics

### 🎨 Styling Features:
- Custom color palettes for different use cases
- Professional themes and formatting
- Responsive layouts for various screen sizes
- Export capabilities for presentations

### 🔧 Technical Integration:
- Seamless integration with matplotlib, seaborn, and plotly
- Automated export functionality
- Interactive features for data exploration
- Web-ready HTML outputs

The platform provides everything needed for comprehensive data visualization from exploratory analysis to executive reporting.