# =============================================================================
# Cell 1: Setup and Imports
# =============================================================================

In [1]:
import pandas as pd
import numpy as np
import dash
from dash import dcc, html, Input, Output, callback, dash_table
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.metrics import mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

print("✅ All libraries imported successfully!")
print("📊 Ready to build e-commerce analytics dashboard")

✅ All libraries imported successfully!
📊 Ready to build e-commerce analytics dashboard


# =============================================================================
# Cell 2: Data Loading and Generation
# =============================================================================

In [2]:
def create_sample_data():
    """Generate realistic e-commerce product data for analysis"""
    
    print("Creating sample e-commerce dataset...")
    
    np.random.seed(42)
    n_products = 2000
    
    categories = ['electronics', 'jewelery', "men's clothing", "women's clothing"]
    
    # Product title templates for each category
    title_templates = {
        'electronics': ['Smartphone', 'Laptop', 'Headphones', 'Camera', 'Tablet', 'Speaker', 'Monitor'],
        'jewelery': ['Necklace', 'Ring', 'Earrings', 'Bracelet', 'Watch', 'Pendant'],
        "men's clothing": ['T-Shirt', 'Jeans', 'Jacket', 'Shirt', 'Pants', 'Sweater'],
        "women's clothing": ['Dress', 'Top', 'Skirt', 'Blouse', 'Cardigan', 'Leggings']
    }
    
    data = []
    for i in range(n_products):
        category = np.random.choice(categories)
        product_type = np.random.choice(title_templates[category])
        
        # Different price ranges for different categories
        if category == 'electronics':
            price = np.random.lognormal(6, 1.5)  # $100-$5000+ range
        elif category == 'jewelery':
            price = np.random.lognormal(4, 1)    # $20-$500 range
        else:
            price = np.random.lognormal(3, 0.8)  # $10-$200 range
            
        # Rating influenced by price and category
        base_rating = 3.2 + min(price / 1000, 1.5)  # Higher price = slight rating boost
        rating = np.clip(np.random.normal(base_rating, 0.4), 1.0, 5.0)
        
        # Review count influenced by price and rating
        popularity_factor = (rating - 2) / 3  # 0-1 scale
        review_count = int(np.random.exponential(80) * (1 + popularity_factor))
        
        data.append({
            'id': i + 1,
            'title': f"{product_type} - Premium {category.title()} #{i+1:04d}",
            'price': round(price, 2),
            'description': f"High-quality {category} product with excellent features and reliable performance",
            'category': category,
            'rating_score': round(rating, 1),
            'rating_count': max(5, review_count)  # Minimum 5 reviews
        })
    
    return pd.DataFrame(data)

# Generate the dataset
df = create_sample_data()

print(f"📦 Dataset created: {len(df)} products")
print(f"🏷️ Categories: {list(df['category'].unique())}")
print(f"💰 Price range: ${df['price'].min():.2f} - ${df['price'].max():.2f}")
print(f"⭐ Rating range: {df['rating_score'].min():.1f} - {df['rating_score'].max():.1f}")

# Display sample data
df.head()

Creating sample e-commerce dataset...
📦 Dataset created: 2000 products
🏷️ Categories: ["men's clothing", 'jewelery', "women's clothing", 'electronics']
💰 Price range: $1.04 - $42085.10
⭐ Rating range: 2.1 - 5.0


Unnamed: 0,id,title,price,description,category,rating_score,rating_count
0,1,Shirt - Premium Men'S Clothing #0001,8.25,High-quality men's clothing product with excel...,men's clothing,3.3,19
1,2,Jacket - Premium Men'S Clothing #0002,25.11,High-quality men's clothing product with excel...,men's clothing,3.6,151
2,3,Watch - Premium Jewelery #0003,30.54,High-quality jewelery product with excellent f...,jewelery,3.0,21
3,4,Top - Premium Women'S Clothing #0004,2.48,High-quality women's clothing product with exc...,women's clothing,3.6,42
4,5,Jeans - Premium Men'S Clothing #0005,16.43,High-quality men's clothing product with excel...,men's clothing,3.2,106


# =============================================================================
# Cell 3: Business Metrics Calculation
# =============================================================================

In [3]:
def calculate_business_metrics(df):
    """Add key business performance indicators"""
    
    print("Calculating business intelligence metrics...")
    
    # Value score: rating quality per dollar
    df['value_score'] = df['rating_score'] / (df['price'] / 100)
    
    # Popularity score based on review volume
    df['popularity_score'] = np.log1p(df['rating_count'])
    
    # Revenue potential estimate
    df['revenue_potential'] = df['price'] * df['rating_count'] * 0.1
    
    # Performance categorization
    def categorize_performance(row):
        high_rating = row['rating_score'] >= 4.5
        high_price = row['price'] >= df['price'].quantile(0.75)
        high_value = row['value_score'] >= df['value_score'].quantile(0.75)
        low_rating = row['rating_score'] < 3.5
        
        if high_rating and high_price:
            return 'Premium Star'
        elif high_rating and high_value:
            return 'Value Champion'
        elif low_rating and high_price:
            return 'Overpriced'
        else:
            return 'Budget Basic'
    
    df['performance_category'] = df.apply(categorize_performance, axis=1)
    
    # Category-specific rankings
    df['price_rank_in_category'] = df.groupby('category')['price'].rank(pct=True)
    df['rating_rank_in_category'] = df.groupby('category')['rating_score'].rank(pct=True)
    
    return df

# Apply business metrics
df = calculate_business_metrics(df)

# Show performance distribution
performance_dist = df['performance_category'].value_counts()
print("\n📊 Performance Category Distribution:")
for category, count in performance_dist.items():
    percentage = (count / len(df)) * 100
    print(f"   {category}: {count} products ({percentage:.1f}%)")

Calculating business intelligence metrics...

📊 Performance Category Distribution:
   Budget Basic: 1728 products (86.4%)
   Overpriced: 151 products (7.5%)
   Premium Star: 120 products (6.0%)
   Value Champion: 1 products (0.1%)


# =============================================================================
# Cell 4: Machine Learning - Customer Segmentation
# =============================================================================

In [4]:
def perform_customer_segmentation(df):
    """Use ML to identify customer segments based on product preferences"""
    
    print("Running customer segmentation analysis...")
    
    # Select features for clustering
    clustering_features = ['price', 'rating_score', 'value_score', 'popularity_score']
    X = df[clustering_features].copy()
    
    # Handle any missing values
    X = X.fillna(X.mean())
    
    # Scale features for clustering
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Perform K-means clustering
    n_clusters = 5
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    df['cluster'] = kmeans.fit_predict(X_scaled)
    
    # Create meaningful segment names based on cluster characteristics
    segment_names = {}
    for cluster in range(n_clusters):
        cluster_data = df[df['cluster'] == cluster]
        
        avg_price = cluster_data['price'].mean()
        avg_rating = cluster_data['rating_score'].mean()
        avg_value = cluster_data['value_score'].mean()
        
        # Determine segment name based on characteristics
        price_q75 = df['price'].quantile(0.75)
        price_q25 = df['price'].quantile(0.25)
        rating_q75 = df['rating_score'].quantile(0.75)
        value_q75 = df['value_score'].quantile(0.75)
        
        if avg_price > price_q75:
            name = "Premium Quality" if avg_rating > rating_q75 else "Expensive"
        elif avg_price < price_q25:
            name = "Value Champions" if avg_rating > rating_q75 else "Budget Basic"
        elif avg_value > value_q75:
            name = "Best Value"
        else:
            name = "Mainstream"
        
        segment_names[cluster] = name
    
    df['segment_name'] = df['cluster'].map(segment_names)
    
    # Add additional ML-derived features
    df['value_efficiency'] = df['rating_score'] / (df['price'] / 100)
    df['engagement_ratio'] = df['rating_count'] / df['rating_count'].max()
    
    return df, kmeans, scaler, segment_names

# Apply ML segmentation
df, ml_model, scaler, segments = perform_customer_segmentation(df)

# Display segmentation results
print("\n🤖 Customer Segments Identified:")
segment_summary = df.groupby('segment_name').agg({
    'price': 'mean',
    'rating_score': 'mean',
    'value_score': 'mean',
    'cluster': 'count'
}).round(2)

segment_summary.columns = ['Avg_Price', 'Avg_Rating', 'Avg_Value', 'Product_Count']
print(segment_summary)

Running customer segmentation analysis...

🤖 Customer Segments Identified:
                 Avg_Price  Avg_Rating  Avg_Value  Product_Count
segment_name                                                    
Budget Basic          5.81        3.34      65.73            151
Mainstream           94.78        3.23      11.51           1606
Premium Quality    2679.96        4.46       2.24            243


# =============================================================================
# Cell 5: Dashboard Creation Functions
# =============================================================================

In [5]:
def create_portfolio_scatter(data):
    """Create interactive portfolio performance scatter plot"""
    
    fig = go.Figure()
    
    # Color by customer segment
    for segment in data['segment_name'].unique():
        segment_data = data[data['segment_name'] == segment]
        
        fig.add_trace(go.Scatter(
            x=segment_data['price'],
            y=segment_data['rating_score'],
            mode='markers',
            name=segment,
            marker=dict(
                size=np.sqrt(segment_data['rating_count'])/3,  # Size by popularity
                opacity=0.7,
                line=dict(width=1, color='white')
            ),
            text=segment_data['title'],
            customdata=segment_data[['value_score', 'rating_count', 'performance_category']],
            hovertemplate=
            '<b>%{text}</b><br>' +
            'Price: $%{x:.2f}<br>' +
            'Rating: %{y:.1f}★<br>' +
            'Value Score: %{customdata[0]:.2f}<br>' +
            'Reviews: %{customdata[1]:,}<br>' +
            'Segment: ' + segment + '<br>' +
            'Category: %{customdata[2]}' +
            '<extra></extra>'
        ))
    
    # Add quadrant lines
    fig.add_vline(x=data['price'].median(), line_dash="dash", line_color="gray", opacity=0.5)
    fig.add_hline(y=data['rating_score'].median(), line_dash="dash", line_color="gray", opacity=0.5)
    
    fig.update_layout(
        title="Portfolio Performance Matrix - Price vs Rating by Customer Segment",
        xaxis_title="Price ($)",
        yaxis_title="Rating Score",
        height=600,
        template="plotly_white",
        showlegend=True
    )
    
    return fig

def create_segment_pie_chart(data):
    """Create customer segment distribution pie chart"""
    
    segment_counts = data['segment_name'].value_counts()
    
    fig = go.Figure(data=[go.Pie(
        labels=segment_counts.index,
        values=segment_counts.values,
        textinfo='label+percent',
        hovertemplate='<b>%{label}</b><br>Products: %{value}<br>Percentage: %{percent}<extra></extra>'
    )])
    
    fig.update_layout(
        title="Customer Segment Distribution",
        height=400,
        template="plotly_white"
    )
    
    return fig

def create_category_performance_bar(data):
    """Create category performance comparison"""
    
    category_stats = data.groupby('category').agg({
        'price': 'mean',
        'rating_score': 'mean',
        'value_score': 'mean',
        'rating_count': 'sum'
    }).round(2)
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        name='Avg Price',
        x=category_stats.index,
        y=category_stats['price'],
        yaxis='y',
        offsetgroup=1
    ))
    
    fig.add_trace(go.Bar(
        name='Avg Rating',
        x=category_stats.index,
        y=category_stats['rating_score'],
        yaxis='y2',
        offsetgroup=2
    ))
    
    fig.update_layout(
        title="Category Performance Comparison",
        xaxis_title="Category",
        yaxis=dict(title="Average Price ($)", side="left"),
        yaxis2=dict(title="Average Rating", side="right", overlaying="y"),
        height=400,
        template="plotly_white"
    )
    
    return fig

print("📊 Visualization functions created successfully")

📊 Visualization functions created successfully


# =============================================================================
# Cell 6: Dashboard Layout and App Creation
# =============================================================================

In [6]:
def create_dashboard_app(df):
    """Create the main Dash application"""
    
    # Initialize Dash app
    app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
    app.title = "E-commerce Portfolio Analytics"
    
    # Calculate key metrics
    metrics = {
        'total_products': len(df),
        'categories': df['category'].nunique(),
        'segments': df['segment_name'].nunique(),
        'avg_price': df['price'].mean(),
        'avg_rating': df['rating_score'].mean(),
        'value_champions': len(df[df['performance_category'] == 'Value Champion']),
        'overpriced': len(df[df['performance_category'] == 'Overpriced'])
    }
    
    # App layout
    app.layout = dbc.Container([
        # Header
        dbc.Row([
            dbc.Col([
                html.Div([
                    html.H1("E-commerce Portfolio Analytics", className="display-4 text-center mb-3"),
                    html.P("AI-Powered Business Intelligence Dashboard", 
                           className="lead text-center text-muted mb-4"),
                    html.Hr()
                ])
            ])
        ]),
        
        # Key Metrics Cards
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4(f"{metrics['total_products']:,}", className="text-primary"),
                        html.P("Total Products", className="mb-0"),
                        html.Small("Portfolio size", className="text-muted")
                    ])
                ], className="text-center shadow-sm")
            ], width=2),
            
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4(f"{metrics['segments']}", className="text-info"),
                        html.P("Customer Segments", className="mb-0"),
                        html.Small("AI-identified", className="text-muted")
                    ])
                ], className="text-center shadow-sm")
            ], width=2),
            
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4(f"${metrics['avg_price']:.0f}", className="text-success"),
                        html.P("Average Price", className="mb-0"),
                        html.Small("Across portfolio", className="text-muted")
                    ])
                ], className="text-center shadow-sm")
            ], width=2),
            
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4(f"{metrics['avg_rating']:.1f}★", className="text-warning"),
                        html.P("Average Rating", className="mb-0"),
                        html.Small("Customer satisfaction", className="text-muted")
                    ])
                ], className="text-center shadow-sm")
            ], width=3),
            
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4(f"{metrics['value_champions']}", className="text-success"),
                        html.P("Value Champions", className="mb-0"),
                        html.Small("High-opportunity products", className="text-muted")
                    ])
                ], className="text-center shadow-sm")
            ], width=3)
        ], className="mb-4"),
        
        # Filters Panel
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader([
                        html.H5("Dashboard Filters", className="mb-0")
                    ]),
                    dbc.CardBody([
                        dbc.Row([
                            dbc.Col([
                                html.Label("Product Categories:", className="fw-bold"),
                                dcc.Dropdown(
                                    id='category-filter',
                                    options=[{'label': cat.title(), 'value': cat} 
                                           for cat in sorted(df['category'].unique())],
                                    value=list(df['category'].unique()),
                                    multi=True,
                                    placeholder="Select categories..."
                                )
                            ], width=3),
                            
                            dbc.Col([
                                html.Label("Price Range ($):", className="fw-bold"),
                                dcc.RangeSlider(
                                    id='price-slider',
                                    min=int(df['price'].min()),
                                    max=int(df['price'].max()),
                                    value=[int(df['price'].min()), int(df['price'].max())],
                                    marks={
                                        int(df['price'].min()): f"${df['price'].min():.0f}",
                                        int(df['price'].quantile(0.5)): f"${df['price'].quantile(0.5):.0f}",
                                        int(df['price'].max()): f"${df['price'].max():.0f}"
                                    },
                                    tooltip={"placement": "bottom", "always_visible": True}
                                )
                            ], width=3),
                            
                            dbc.Col([
                                html.Label("Customer Segments:", className="fw-bold"),
                                dcc.Dropdown(
                                    id='segment-filter',
                                    options=[{'label': seg, 'value': seg} 
                                           for seg in sorted(df['segment_name'].unique())],
                                    value=list(df['segment_name'].unique()),
                                    multi=True,
                                    placeholder="Select segments..."
                                )
                            ], width=3),
                            
                            dbc.Col([
                                html.Label("Performance Category:", className="fw-bold"),
                                dcc.Dropdown(
                                    id='performance-filter',
                                    options=[{'label': perf, 'value': perf} 
                                           for perf in sorted(df['performance_category'].unique())],
                                    value=list(df['performance_category'].unique()),
                                    multi=True,
                                    placeholder="Select performance..."
                                )
                            ], width=3)
                        ])
                    ])
                ], className="shadow-sm")
            ])
        ], className="mb-4"),
        
        # Main Content Area
        html.Div(id="dashboard-content")
        
    ], fluid=True)
    
    return app

# Create the dashboard app
app = create_dashboard_app(df)
print("✅ Dashboard application created successfully")


✅ Dashboard application created successfully


# =============================================================================
# Cell 7: Dashboard Callback and Content Generation
# =============================================================================

In [7]:
def create_dashboard_content(filtered_data):
    """Generate the main dashboard visualizations"""
    
    if len(filtered_data) == 0:
        return dbc.Alert([
            html.H4("No Data Found"),
            html.P("No products match your current filter selection."),
            html.P("Try adjusting your filters to see results.")
        ], color="warning", className="text-center m-4")
    
    # Create visualizations
    portfolio_fig = create_portfolio_scatter(filtered_data)
    segment_fig = create_segment_pie_chart(filtered_data)
    category_fig = create_category_performance_bar(filtered_data)
    
    # Prepare data table
    table_columns = ['title', 'category', 'price', 'rating_score', 'rating_count', 
                    'segment_name', 'performance_category', 'value_score']
    table_data = filtered_data[table_columns].head(25).round(2)
    
    data_table = dash_table.DataTable(
        data=table_data.to_dict('records'),
        columns=[
            {'name': 'Product', 'id': 'title'},
            {'name': 'Category', 'id': 'category'},
            {'name': 'Price ($)', 'id': 'price', 'type': 'numeric', 'format': {'specifier': '.2f'}},
            {'name': 'Rating', 'id': 'rating_score', 'type': 'numeric', 'format': {'specifier': '.1f'}},
            {'name': 'Reviews', 'id': 'rating_count', 'type': 'numeric'},
            {'name': 'Customer Segment', 'id': 'segment_name'},
            {'name': 'Performance', 'id': 'performance_category'},
            {'name': 'Value Score', 'id': 'value_score', 'type': 'numeric', 'format': {'specifier': '.2f'}}
        ],
        style_cell={
            'textAlign': 'left', 
            'padding': '12px',
            'fontFamily': 'Arial, sans-serif'
        },
        style_header={
            'backgroundColor': '#007bff',
            'color': 'white',
            'fontWeight': 'bold'
        },
        style_data_conditional=[
            {
                'if': {'filter_query': '{performance_category} = Value Champion'},
                'backgroundColor': '#d4edda',
                'color': 'black'
            },
            {
                'if': {'filter_query': '{performance_category} = Overpriced'},
                'backgroundColor': '#f8d7da',
                'color': 'black'
            }
        ],
        page_size=20,
        sort_action="native",
        filter_action="native"
    )
    
    # Summary statistics
    summary_stats = {
        'filtered_count': len(filtered_data),
        'avg_price': filtered_data['price'].mean(),
        'avg_rating': filtered_data['rating_score'].mean(),
        'avg_value': filtered_data['value_score'].mean(),
        'total_reviews': filtered_data['rating_count'].sum()
    }
    
    return dbc.Container([
        # Summary Row
        dbc.Row([
            dbc.Col([
                dbc.Alert([
                    html.H5(f"Showing {summary_stats['filtered_count']:,} products"),
                    html.P([
                        f"Average Price: ${summary_stats['avg_price']:.2f} | ",
                        f"Average Rating: {summary_stats['avg_rating']:.1f}★ | ",
                        f"Total Reviews: {summary_stats['total_reviews']:,}"
                    ], className="mb-0")
                ], color="info")
            ])
        ], className="mb-3"),
        
        # Main Visualizations
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader("Portfolio Performance Analysis"),
                    dbc.CardBody([dcc.Graph(figure=portfolio_fig)])
                ], className="shadow-sm")
            ], width=8),
            
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader("Customer Segments"),
                    dbc.CardBody([dcc.Graph(figure=segment_fig)])
                ], className="shadow-sm")
            ], width=4)
        ], className="mb-4"),
        
        # Category Analysis
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader("Category Performance Comparison"),
                    dbc.CardBody([dcc.Graph(figure=category_fig)])
                ], className="shadow-sm")
            ])
        ], className="mb-4"),
        
        # Data Table
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardHeader(f"Product Details ({len(filtered_data)} products)"),
                    dbc.CardBody([data_table])
                ], className="shadow-sm")
            ])
        ])
    ])

# Add the interactive callback
@app.callback(
    Output('dashboard-content', 'children'),
    [Input('category-filter', 'value'),
     Input('price-slider', 'value'),
     Input('segment-filter', 'value'),
     Input('performance-filter', 'value')]
)
def update_dashboard(categories, price_range, segments, performance):
    """Update dashboard based on filter selections"""
    
    # Start with full dataset
    filtered_data = df.copy()
    
    # Apply filters
    if categories:
        filtered_data = filtered_data[filtered_data['category'].isin(categories)]
    
    if price_range:
        filtered_data = filtered_data[
            (filtered_data['price'] >= price_range[0]) & 
            (filtered_data['price'] <= price_range[1])
        ]
    
    if segments:
        filtered_data = filtered_data[filtered_data['segment_name'].isin(segments)]
    
    if performance:
        filtered_data = filtered_data[filtered_data['performance_category'].isin(performance)]
    
    return create_dashboard_content(filtered_data)

print("✅ Dashboard callbacks configured successfully")

✅ Dashboard callbacks configured successfully


# =============================================================================
# Cell 8: Launch Dashboard
# =============================================================================

In [9]:
def launch_dashboard(port=8050, debug=True):
    """Launch the interactive dashboard"""
    
    print("🚀 Launching E-commerce Analytics Dashboard")
    print("=" * 50)
    print(f"📊 Dataset: {len(df)} products across {df['category'].nunique()} categories")
    print(f"🤖 ML Segments: {df['segment_name'].nunique()} customer segments identified")
    print(f"📈 Performance Categories: {df['performance_category'].nunique()} categories")
    print("")
    print(f"🌐 Dashboard URL: http://localhost:{port}")
    print("⏹️  Press Ctrl+C to stop the server")
    print("=" * 50)
    
    try:
        app.run(debug=debug, host='0.0.0.0', port=port)
    except Exception as e:
        print(f"❌ Error launching dashboard: {e}")
        print(f"💡 Try a different port: launch_dashboard(port=8051)")

# Launch the dashboard
# Uncomment the line below to run the dashboard
# launch_dashboard()

print("🎯 Dashboard ready to launch!")
print("📝 Run: launch_dashboard() to start the application")

🎯 Dashboard ready to launch!
📝 Run: launch_dashboard() to start the application


In [10]:
launch_dashboard()

🚀 Launching E-commerce Analytics Dashboard
📊 Dataset: 2000 products across 4 categories
🤖 ML Segments: 3 customer segments identified
📈 Performance Categories: 4 categories

🌐 Dashboard URL: http://localhost:8050
⏹️  Press Ctrl+C to stop the server
