# Interactive Data Visualization with SQL and Plotly

This notebook demonstrates how to fetch data from a SQL database and generate interactive Plotly visualizations using the `plot_executor` module.

In [None]:
import sys
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import HTML

# Add the parent directory to the path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('.'))))
from tools.plot_executor import fetch_data_from_db, execute_plot_code, display_plot

## Fetch Data from SQL Database

First, let's fetch some data from the SQL database using the `fetch_data_from_db` function.

In [None]:
# Example SQL query - adjust based on your actual database schema
query = """
SELECT TOP 100 * 
FROM [master].[dbo].[customer_information]
"""

# Fetch data from the database
df = fetch_data_from_db(query)

# Display the first few rows of the data
if df is not None:
    display(df.head())
    print(f"Fetched {len(df)} rows with {len(df.columns)} columns")
else:
    print("Failed to fetch data from database")

## Example 1: Customer Segmentation by Income Category

In [None]:
# Plotly visualization code
income_viz_code = """
import plotly.express as px

# Check if the required columns exist
if 'income_category' in df.columns and 'income' in df.columns:
    # Group data by income_category
    segment_revenue = df.groupby('income_category')['income'].sum().reset_index()
    segment_revenue = segment_revenue.sort_values('income', ascending=False)

    # Create interactive bar chart
    fig = px.bar(
        segment_revenue,
        x='income_category',
        y='income',
        color='income_category',
        labels={'income_category': 'Customer Segment', 'income': 'Total Revenue'},
        title='Revenue by Customer Segment',
        template='plotly_white',
        color_discrete_sequence=px.colors.qualitative.G10
    )

    # Enhance with hover information
    fig.update_traces(
        hovertemplate='<b>%{x}</b><br>Revenue: $%{y:,.2f}<extra></extra>',
        marker_line_width=1,
        marker_line_color='rgb(8,48,107)'
    )

    # Customize layout
    fig.update_layout(
        xaxis_title='Customer Segment',
        yaxis_title='Total Revenue ($)',
        yaxis_tickformat='$,.0f',
        xaxis={'categoryorder': 'total descending'}
    )
else:
    # Fallback if columns don't exist
    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
    categorical_cols = df.select_dtypes(include=['object']).columns
    
    if len(categorical_cols) > 0 and len(numeric_cols) > 0:
        # Use first categorical column and first numeric column
        cat_col = categorical_cols[0]
        num_col = numeric_cols[0]
        
        # Group data and create visualization
        grouped_data = df.groupby(cat_col)[num_col].sum().reset_index()
        grouped_data = grouped_data.sort_values(num_col, ascending=False)
        
        fig = px.bar(
            grouped_data,
            x=cat_col,
            y=num_col,
            color=cat_col,
            title=f'{num_col} by {cat_col}',
            template='plotly_white'
        )
    else:
        # Create a basic histogram of a numeric column
        if len(numeric_cols) > 0:
            fig = px.histogram(
                df, 
                x=numeric_cols[0],
                title=f'Distribution of {numeric_cols[0]}',
                template='plotly_white'
            )
        else:
            # Create an empty figure with a message
            fig = go.Figure()
            fig.add_annotation(
                text="No suitable columns found for visualization",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False
            )
"""

# Execute the code and display the plot
if df is not None:
    html_plot = execute_plot_code(income_viz_code, df)
    display_plot(html_plot)
else:
    print("Cannot generate visualization - no data available")

## Example 2: Interactive Multi-Metric Dashboard

In [None]:
# Plotly visualization code for a more complex dashboard
dashboard_code = """
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Create a subplot with 2x2 grid
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        'Customer Age Distribution', 
        'Credit Score vs Income',
        'Customer Balance by Age Group', 
        'Product Holdings Distribution'
    ),
    specs=[
        [{'type': 'histogram'}, {'type': 'scatter'}],
        [{'type': 'bar'}, {'type': 'pie'}]
    ]
)

# 1. Age Distribution - Histogram 
if 'age' in df.columns:
    fig.add_trace(
        go.Histogram(
            x=df['age'],
            nbinsx=30,
            marker_color='rgba(73, 160, 181, 0.7)',
            name='Age Distribution'
        ),
        row=1, col=1
    )
else:
    # Add annotation if column doesn't exist
    fig.add_annotation(
        text="Age data not available",
        xref="x1", yref="y1",
        x=0.5, y=0.5, showarrow=False
    )

# 2. Credit Score vs Income - Scatter plot
if 'credit_score' in df.columns and 'income' in df.columns:
    fig.add_trace(
        go.Scatter(
            x=df['credit_score'],
            y=df['income'],
            mode='markers',
            marker=dict(
                color='rgba(152, 78, 163, 0.7)',
                size=8,
                opacity=0.6
            ),
            name='Credit Score vs Income',
            hovertemplate='Credit Score: %{x}<br>Income: $%{y:,.2f}<extra></extra>'
        ),
        row=1, col=2
    )
else:
    # Add annotation if columns don't exist
    fig.add_annotation(
        text="Credit Score and Income data not available",
        xref="x2", yref="y2",
        x=0.5, y=0.5, showarrow=False
    )

# 3. Balance by Age Group - Bar chart
if 'age' in df.columns and 'balance' in df.columns:
    # Create age groups
    bins = [18, 30, 40, 50, 60, 100]
    labels = ['18-29', '30-39', '40-49', '50-59', '60+']
    df_with_age_groups = df.copy()
    df_with_age_groups['age_group'] = pd.cut(df_with_age_groups['age'], bins=bins, labels=labels, right=False)
    
    # Aggregate data
    balance_by_age = df_with_age_groups.groupby('age_group')['balance'].mean().reset_index()
    
    fig.add_trace(
        go.Bar(
            x=balance_by_age['age_group'],
            y=balance_by_age['balance'],
            marker_color='rgba(60, 179, 113, 0.7)',
            name='Avg Balance by Age',
            hovertemplate='Age Group: %{x}<br>Avg Balance: $%{y:,.2f}<extra></extra>'
        ),
        row=2, col=1
    )
else:
    # Add annotation if columns don't exist
    fig.add_annotation(
        text="Age and Balance data not available",
        xref="x3", yref="y3",
        x=0.5, y=0.5, showarrow=False
    )

# 4. Product Holdings - Pie chart
if 'product_holding' in df.columns:
    # Count number of products per customer (assuming product_holding is a JSON array)
    # This is a simple fallback approach since we can't parse JSON in this context
    df_with_product_count = df.copy()
    
    # Try to extract length of JSON array if it's a string representation
    try:
        import json
        df_with_product_count['product_count'] = df_with_product_count['product_holding'].apply(
            lambda x: len(json.loads(x)) if isinstance(x, str) else (len(x) if isinstance(x, list) else 0)
        )
    except:
        # Fallback: count commas as a rough estimate of array items
        df_with_product_count['product_count'] = df_with_product_count['product_holding'].astype(str).str.count(',') + 1
    
    # Count customers by number of products
    product_counts = df_with_product_count['product_count'].value_counts().reset_index()
    product_counts.columns = ['num_products', 'count']
    
    fig.add_trace(
        go.Pie(
            labels=product_counts['num_products'].astype(str) + ' Products',
            values=product_counts['count'],
            name='Product Holdings',
            marker_colors=px.colors.qualitative.Pastel,
            textinfo='percent',
            hoverinfo='label+percent+value',
            hole=0.4
        ),
        row=2, col=2
    )
else:
    # Use an alternative if product_holding doesn't exist
    categorical_cols = df.select_dtypes(include=['object']).columns
    if len(categorical_cols) > 0:
        # Use first categorical column for pie chart
        cat_col = categorical_cols[0]
        count_data = df[cat_col].value_counts().reset_index()
        count_data.columns = [cat_col, 'count']
        
        fig.add_trace(
            go.Pie(
                labels=count_data[cat_col],
                values=count_data['count'],
                name=cat_col,
                marker_colors=px.colors.qualitative.Pastel,
                textinfo='percent',
                hoverinfo='label+percent+value',
                hole=0.4
            ),
            row=2, col=2
        )
    else:
        # Add annotation if no suitable column exists
        fig.add_annotation(
            text="Product data not available",
            xref="x4", yref="y4",
            x=0.5, y=0.5, showarrow=False
        )

# Update layout
fig.update_layout(
    title_text='Customer Financial Dashboard',
    showlegend=True,
    height=800,
    width=1000,
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.2,
        xanchor="center",
        x=0.5
    )
)

# Update axes
fig.update_xaxes(title_text='Age', row=1, col=1)
fig.update_yaxes(title_text='Count', row=1, col=1)

fig.update_xaxes(title_text='Credit Score', row=1, col=2)
fig.update_yaxes(title_text='Income ($)', tickprefix='$', row=1, col=2)

fig.update_xaxes(title_text='Age Group', row=2, col=1)
fig.update_yaxes(title_text='Average Balance ($)', tickprefix='$', row=2, col=1)
"""

# Execute the code and display the plot
if df is not None:
    html_plot = execute_plot_code(dashboard_code, df)
    display_plot(html_plot)
else:
    print("Cannot generate visualization - no data available")

## Create a Custom Visualization

Now, try creating your own visualization by writing Plotly code in the cell below:

In [None]:
# Write your Plotly visualization code here
custom_code = """
# Your Plotly code goes here
# For example:

import plotly.express as px

# Select a numeric column from the dataframe
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
if len(numeric_cols) > 0:
    fig = px.histogram(
        df,
        x=numeric_cols[0],
        title=f'Distribution of {numeric_cols[0]}',
        template='plotly_white',
        marginal='box'  # Add a box plot on the margin
    )
    
    fig.update_layout(
        xaxis_title=numeric_cols[0],
        yaxis_title='Count',
        height=600,
        width=900
    )
else:
    fig = go.Figure()
    fig.add_annotation(
        text="No numeric columns found for visualization",
        xref="paper", yref="paper",
        x=0.5, y=0.5, showarrow=False
    )
"""

# Execute your code
if df is not None:
    html_plot = execute_plot_code(custom_code, df)
    display_plot(html_plot)
else:
    print("Cannot generate visualization - no data available")

## Alternative SQL Query

Let's try a different SQL query to get transaction data instead:

In [None]:
# Query for transaction history
transaction_query = """
SELECT TOP 500 * 
FROM [master].[dbo].[transaction_history]
ORDER BY transaction_date DESC
"""

# Fetch transaction data
transaction_df = fetch_data_from_db(transaction_query)

if transaction_df is not None:
    display(transaction_df.head())
    print(f"Fetched {len(transaction_df)} rows with {len(transaction_df.columns)} columns")
    
    # Make sure transaction_date is in datetime format
    if 'transaction_date' in transaction_df.columns:
        transaction_df['transaction_date'] = pd.to_datetime(transaction_df['transaction_date'])
else:
    print("Failed to fetch transaction data from database")

In [None]:
# Visualization for transaction data
transaction_viz_code = """
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta

# Ensure transaction_date is in datetime format
if 'transaction_date' in df.columns:
    df['transaction_date'] = pd.to_datetime(df['transaction_date'])
    
    # Create a time series plot of transactions by category
    fig = px.line(
        df.groupby([pd.Grouper(key='transaction_date', freq='D'), 'category'])['amount'].sum().reset_index(),
        x='transaction_date',
        y='amount',
        color='category',
        title='Transaction Amounts Over Time by Category',
        labels={'transaction_date': 'Date', 'amount': 'Total Amount', 'category': 'Category'}
    )
    
    # Add range selector
    fig.update_xaxes(
        rangeslider_visible=True,
        rangeselector=dict(
            buttons=list([
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        )
    )
    
    # Format y-axis with currency symbol
    fig.update_yaxes(tickprefix='$')
    
    # Improve hover info
    fig.update_traces(
        hovertemplate='<b>%{x}</b><br>Amount: $%{y:,.2f}<extra></extra>'
    )
else:
    # Create an empty figure with a message
    fig = go.Figure()
    fig.add_annotation(
        text="Transaction date column not found in the data",
        xref="paper", yref="paper",
        x=0.5, y=0.5, showarrow=False
    )

# Update layout
fig.update_layout(
    height=600,
    width=1000,
    template='plotly_white',
    hovermode='closest'
)
"""

# Execute and display transaction visualization
if transaction_df is not None:
    html_plot = execute_plot_code(transaction_viz_code, transaction_df)
    display_plot(html_plot)
else:
    print("Cannot generate transaction visualization - no data available")