In [None]:
!pip install dash dash_bootstrap_components

Collecting dash
  Downloading dash-3.2.0-py3-none-any.whl.metadata (10 kB)


In [7]:
import pandas as pd
import numpy as np
import pickle
import plotly.graph_objs as go
from dash import Dash, html, dcc, Input, Output, State, ALL
import dash_bootstrap_components as dbc
from sklearn.preprocessing import LabelEncoder
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [8]:
# ==================== LOAD MODELS ====================
def load_model(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)

CHURN_MODEL_PATH = '/content/drive/MyDrive/VITProjects/PredictiveAnalytics/customer_churn_model.pkl'
DEMAND_MODEL_PATH = '/content/drive/MyDrive/VITProjects/PredictiveAnalytics/sku_demand_forecast_model.pkl'

try:
    churn_model = load_model(CHURN_MODEL_PATH)
    demand_model = load_model(DEMAND_MODEL_PATH)
    churn_model_base = churn_model['model'] if isinstance(churn_model, dict) else churn_model
    demand_model_base = demand_model['model'] if isinstance(demand_model, dict) else demand_model

    churn_encoders = churn_model.get('encoders', {}) if isinstance(churn_model, dict) else {}

    print("‚úì Models loaded successfully!")
except Exception as e:
    print(f"‚úó Error loading models: {e}")
    churn_model_base = None
    demand_model_base = None
    churn_encoders = {}

‚úì Models loaded successfully!


In [9]:
print("Type:", type(churn_model_base))
print("Content preview:", churn_model_base)

Type: <class 'sklearn.multioutput.MultiOutputClassifier'>
Content preview: MultiOutputClassifier(estimator=LGBMClassifier(learning_rate=0.05,
                                               n_estimators=300,
                                               random_state=42))


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
# ==================== FEATURE ENGINEERING ====================

CATEGORICAL_MAPPINGS = {
    'country': ['Canada', 'USA', 'Pakistan', 'India', 'UK', 'Germany', 'France', 'Australia'],
    'gender': ['Male', 'Female'],
    'preferred_category': ['Sports', 'Electronics', 'Clothing', 'Beauty', 'Home'],
    'category': ['Sports', 'Electronics', 'Clothing', 'Beauty', 'Home'],
    'subscription_status': ['active', 'cancelled', 'paused']
}

def create_label_encoders():
    encoders = {}
    for feature, categories in CATEGORICAL_MAPPINGS.items():
        le = LabelEncoder()
        le.fit(categories)
        encoders[feature] = le
    return encoders

if not churn_encoders:
    churn_encoders = create_label_encoders()

In [11]:
# ==================== FEATURE DEFINITIONS ====================

CHURN_FEATURES = [
    {'name': 'age', 'type': 'number', 'default': 35, 'options': None},
    {'name': 'country', 'type': 'dropdown', 'default': 'USA', 'options': CATEGORICAL_MAPPINGS['country']},
    {'name': 'cancellations_count', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'unit_price', 'type': 'number', 'default': 50.0, 'options': None},
    {'name': 'quantity', 'type': 'number', 'default': 3, 'options': None},
    {'name': 'purchase_frequency', 'type': 'number', 'default': 5, 'options': None},
    {'name': 'preferred_category', 'type': 'dropdown', 'default': 'Sports', 'options': CATEGORICAL_MAPPINGS['preferred_category']},
    {'name': 'category', 'type': 'dropdown', 'default': 'Sports', 'options': CATEGORICAL_MAPPINGS['category']},
    {'name': 'gender', 'type': 'dropdown', 'default': 'Female', 'options': CATEGORICAL_MAPPINGS['gender']},
    {'name': 'days_since_signup', 'type': 'number', 'default': 365, 'options': None},
    {'name': 'customer_tenure', 'type': 'number', 'default': 12, 'options': None},
    {'name': 'total_spent', 'type': 'number', 'default': 500.0, 'options': None},
    {'name': 'avg_order_value', 'type': 'number', 'default': 100.0, 'options': None},
    {'name': 'cancellation_rate', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'preferred_category_match', 'type': 'number', 'default': 1, 'options': None},
]

# CORRECTED DEMAND FEATURES - Based on actual model training
DEMAND_FEATURES = [
    # Base features
    {'name': 'city_id', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'store_id', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'management_group_id', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'first_category_id', 'type': 'number', 'default': 5, 'options': None},
    {'name': 'second_category_id', 'type': 'number', 'default': 6, 'options': None},
    {'name': 'third_category_id', 'type': 'number', 'default': 65, 'options': None},
    {'name': 'product_id', 'type': 'number', 'default': 38, 'options': None},
    {'name': 'stock_hour6_22_cnt', 'type': 'number', 'default': 5, 'options': None},
    {'name': 'discount', 'type': 'number', 'default': 1.0, 'options': None},
    {'name': 'holiday_flag', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'activity_flag', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'precpt', 'type': 'number', 'default': 1.7, 'options': None},
    {'name': 'avg_temperature', 'type': 'number', 'default': 15.5, 'options': None},
    {'name': 'avg_humidity', 'type': 'number', 'default': 75.0, 'options': None},
    {'name': 'avg_wind_level', 'type': 'number', 'default': 1.8, 'options': None},
    # Date features
    {'name': 'year', 'type': 'number', 'default': 2024, 'options': None},
    {'name': 'month', 'type': 'number', 'default': 3, 'options': None},
    {'name': 'day', 'type': 'number', 'default': 28, 'options': None},
    {'name': 'day_of_week', 'type': 'number', 'default': 4, 'options': None},
    {'name': 'day_of_year', 'type': 'number', 'default': 88, 'options': None},
    {'name': 'week_of_year', 'type': 'number', 'default': 13, 'options': None},
    {'name': 'quarter', 'type': 'number', 'default': 1, 'options': None},
    {'name': 'is_weekend', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'is_month_start', 'type': 'number', 'default': 0, 'options': None},
    {'name': 'is_month_end', 'type': 'number', 'default': 0, 'options': None},
    # Lag features (previous sales)
    {'name': 'sale_lag_1', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'sale_lag_7', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'sale_lag_14', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'sale_lag_30', 'type': 'number', 'default': 0.1, 'options': None},
    # Rolling statistics (7 days)
    {'name': 'sale_rolling_mean_7', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'sale_rolling_std_7', 'type': 'number', 'default': 0.05, 'options': None},
    {'name': 'sale_rolling_max_7', 'type': 'number', 'default': 0.2, 'options': None},
    {'name': 'sale_rolling_min_7', 'type': 'number', 'default': 0.0, 'options': None},
    # Rolling statistics (14 days)
    {'name': 'sale_rolling_mean_14', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'sale_rolling_std_14', 'type': 'number', 'default': 0.05, 'options': None},
    {'name': 'sale_rolling_max_14', 'type': 'number', 'default': 0.2, 'options': None},
    {'name': 'sale_rolling_min_14', 'type': 'number', 'default': 0.0, 'options': None},
    # Rolling statistics (30 days)
    {'name': 'sale_rolling_mean_30', 'type': 'number', 'default': 0.1, 'options': None},
    {'name': 'sale_rolling_std_30', 'type': 'number', 'default': 0.05, 'options': None},
    {'name': 'sale_rolling_max_30', 'type': 'number', 'default': 0.2, 'options': None},
    {'name': 'sale_rolling_min_30', 'type': 'number', 'default': 0.0, 'options': None},
    # Product statistics
    {'name': 'product_mean_sale', 'type': 'number', 'default': 0.15, 'options': None},
    {'name': 'product_std_sale', 'type': 'number', 'default': 0.05, 'options': None},
    {'name': 'product_max_sale', 'type': 'number', 'default': 0.3, 'options': None},
    # Store statistics
    {'name': 'store_mean_sale', 'type': 'number', 'default': 0.15, 'options': None},
    {'name': 'store_std_sale', 'type': 'number', 'default': 0.05, 'options': None},
    # Interaction feature
    {'name': 'store_product_interaction', 'type': 'number', 'default': 0.02, 'options': None},
]

def encode_features(input_dict, encoders):
    """Encode categorical features"""
    encoded = input_dict.copy()

    for feature, encoder in encoders.items():
        if feature in encoded:
            try:
                if isinstance(encoded[feature], str):
                    encoded[feature] = int(encoder.transform([encoded[feature]])[0])
                elif isinstance(encoded[feature], list):
                    encoded[feature] = int(encoder.transform(encoded[feature])[0])
            except Exception as e:
                print(f"Warning: Could not encode {feature}: {e}")
                encoded[feature] = 0

    return encoded

In [12]:
# ==================== VISUALIZATION FUNCTIONS ====================

def create_feature_importance_plot(importance_df, title="Feature Importance"):
    if importance_df is None or len(importance_df) == 0:
        return go.Figure().add_annotation(
            text="Feature importance not available",
            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False
        )

    top_features = importance_df.head(15)

    fig = go.Figure(go.Bar(
        x=top_features['importance'] if 'importance' in top_features.columns else top_features['impact'],
        y=top_features['feature'],
        orientation='h',
        marker=dict(
            color=top_features['importance'] if 'importance' in top_features.columns else top_features['impact'],
            colorscale='Viridis',
            showscale=True
        )
    ))

    fig.update_layout(
        title=title,
        xaxis_title='Importance Score',
        yaxis_title='Features',
        height=500,
        template='plotly_white',
        margin=dict(l=200)
    )

    return fig

def create_contribution_plot(contrib_df, title="Feature Contributions"):
    if contrib_df is None or len(contrib_df) == 0:
        return go.Figure().add_annotation(
            text="Contribution data not available",
            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False
        )

    top_contrib = contrib_df.head(10)
    colors = ['red' if x < 0 else 'green' for x in top_contrib['deviation']]

    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=top_contrib['contribution'],
        y=top_contrib['feature'],
        orientation='h',
        marker=dict(color=colors),
        text=[f"{val:.3f}" for val in top_contrib['contribution']],
        textposition='auto'
    ))

    fig.update_layout(
        title=title,
        xaxis_title='Contribution Score',
        yaxis_title='Features',
        height=400,
        template='plotly_white',
        showlegend=False,
        margin=dict(l=200)
    )

    return fig

def get_feature_importance_xgb(model, feature_names):
    try:
        if hasattr(model, 'estimators_'):
            base_model = model.estimators_[0]
        else:
            base_model = model

        if hasattr(base_model, 'feature_importances_'):
            importance = base_model.feature_importances_
        else:
            return None

        if len(importance) != len(feature_names):
            if len(importance) < len(feature_names):
                importance = list(importance) + [0] * (len(feature_names) - len(importance))
            else:
                importance = importance[:len(feature_names)]

        df = pd.DataFrame({
            'feature': feature_names,
            'importance': importance
        }).sort_values('importance', ascending=False)

        return df
    except Exception as e:
        print(f"Feature importance extraction failed: {e}")
        return None

def explain_prediction(model, X_input, feature_names, is_classifier=True):
    explanations = {}

    try:
        # Ensure input is a DataFrame
        if not isinstance(X_input, pd.DataFrame):
             X_input = pd.DataFrame(X_input, columns=feature_names)

        print(f"Input DataFrame:\n{X_input}")
        print(f"Input dtypes:\n{X_input.dtypes}")

        # Perform prediction safely
        if is_classifier:
          if hasattr(model, 'predict_proba'):
              pred_proba = model.predict_proba(X_input)
              pred_class = model.predict(X_input)
              explanations['prediction'] = pred_class

              # ‚úÖ Handle MultiOutputClassifier (list of arrays)
              if isinstance(pred_proba, list):
                  try:
                      stacked = np.column_stack([p[:, 1] if p.ndim > 1 else p for p in pred_proba])
                      explanations['probability'] = stacked
                      print(f"‚úì Multi-output probabilities stacked: {stacked.shape}")
                  except Exception as e:
                      print(f"Could not stack multioutput proba: {e}")
                      explanations['probability'] = pred_proba
              else:
                  explanations['probability'] = pred_proba
                  print(f"‚úì Prediction: {pred_class}, Proba shape: {pred_proba.shape}")
          else:
              pred_class = model.predict(X_input)
              explanations['prediction'] = pred_class
        else:
            pred = model.predict(X_input)
            explanations['prediction'] = pred
            print(f"‚úì Prediction: {pred}")
    except Exception as e:
        print(f"Prediction failed: {e}")
        import traceback
        traceback.print_exc()
        explanations['error'] = str(e)
        return explanations

    # Feature contribution estimation
    try:
        input_values = X_input.iloc[0].values
        feature_mean = np.array([0.5] * len(input_values)) # This might need adjustment based on actual feature scaling/distribution
        deviations = np.abs(input_values - feature_mean)

        if deviations.sum() > 0:
            contributions = deviations / deviations.sum()
        else:
            contributions = np.ones(len(deviations)) / len(deviations)

        explanations['feature_contributions'] = pd.DataFrame({
            'feature': feature_names,
            'value': input_values,
            'mean': feature_mean,
            'contribution': contributions,
            'deviation': input_values - feature_mean
        }).sort_values('contribution', ascending=False)

        explanations['top_features'] = explanations['feature_contributions'].head(5)
    except Exception as e:
        print(f"Contribution calculation failed: {e}")
        explanations['feature_contributions'] = None

    return explanations

In [13]:
# ==================== DASH APP ====================

app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.config.suppress_callback_exceptions = True

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1("üîç Explainable AI Dashboard",
                   className="text-center mb-4 mt-4",
                   style={'color': '#2c3e50'})
        ])
    ]),

    html.Hr(),

    dbc.Row([
        dbc.Col([
            html.Label("Select Model:", className="fw-bold"),
            dcc.Dropdown(
                id='model-selector',
                options=[
                    {'label': 'üéØ Customer Churn Prediction', 'value': 'churn'},
                    {'label': 'üìä Demand Forecasting', 'value': 'demand'}
                ],
                value='churn',
                clearable=False,
                className="mb-3"
            )
        ], width=6)
    ]),

    dbc.Row([
        dbc.Col([
            html.Label("Analysis Type:", className="fw-bold"),
            dcc.RadioItems(
                id='analysis-type',
                options=[
                    {'label': ' Model Overview (Feature Importance)', 'value': 'overview'},
                    {'label': ' Predict & Explain New Input', 'value': 'predict'}
                ],
                value='overview',
                className="mb-3"
            )
        ], width=12)
    ]),

    html.Hr(),
    html.Div(id='input-section', className="mb-4"),
    dbc.Row([dbc.Col([html.Div(id='results-section')])])

], fluid=True, style={'maxWidth': '1600px'})

In [14]:
# ==================== CALLBACKS ====================

@app.callback(
    Output('input-section', 'children'),
    [Input('model-selector', 'value'),
     Input('analysis-type', 'value')]
)
def show_input_section(model_type, analysis_type):
    if analysis_type != 'predict':
        return html.Div()

    features = CHURN_FEATURES if model_type == 'churn' else DEMAND_FEATURES

    inputs = []
    for i, feat_spec in enumerate(features):
        if feat_spec['type'] == 'number':
            input_widget = dbc.Input(
                id={'type': 'dynamic-input', 'index': i},
                type='number',
                value=feat_spec['default'],
                step=0.01,
                className="mb-2"
            )
        else:
            input_widget = dcc.Dropdown(
                id={'type': 'dynamic-input', 'index': i},
                options=[{'label': opt, 'value': opt} for opt in feat_spec['options']],
                value=feat_spec['default'],
                clearable=False,
                className="mb-2"
            )

        inputs.append(
            dbc.Col([
                html.Label(feat_spec['name'].replace('_', ' ').title(),
                          className="small fw-bold",
                          style={'fontSize': '11px'}),
                input_widget
            ], width=2 if model_type == 'demand' else 3)
        )

    return dbc.Card([
        dbc.CardBody([
            html.H5("Enter Input Values:", className="mb-3"),
            html.P(f"Model requires {len(features)} features",
                   className="text-muted small"),
            dbc.Row(inputs, style={'maxHeight': '600px', 'overflowY': 'auto'}),
            dbc.Row([
                dbc.Col([
                    dbc.Button("üîÆ Predict & Explain",
                             id='predict-button',
                             color="primary",
                             className="mt-3 w-100")
                ], width=3)
            ])
        ])
    ], className="mb-4")


@app.callback(
    Output('results-section', 'children'),
    [Input('model-selector', 'value'),
     Input('analysis-type', 'value')],
    prevent_initial_call=False
)
def update_overview(model_type, analysis_type):
    if analysis_type != 'overview':
        return dbc.Alert("üëÜ Configure inputs and click 'Predict & Explain'", color="info")

    model = churn_model_base if model_type == 'churn' else demand_model_base
    features = CHURN_FEATURES if model_type == 'churn' else DEMAND_FEATURES
    feature_names = [f['name'] for f in features]

    if model is None:
        return dbc.Alert("‚ö†Ô∏è Model not loaded", color="danger")

    importance_df = get_feature_importance_xgb(model, feature_names)

    if importance_df is None:
        return dbc.Alert("‚ö†Ô∏è Could not extract feature importance", color="warning")

    fig = create_feature_importance_plot(importance_df,
                                        f"{model_type.title()} Model - Feature Importance")

    top_5 = importance_df.head(5)

    return [
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4("üìä Model Overview", className="mb-3"),
                        html.P(f"Model: {type(model).__name__}", className="mb-2"),
                        html.P(f"Features: {len(feature_names)}", className="mb-2"),
                        html.Hr(),
                        html.H5("Top 5 Important Features:", className="mb-2"),
                        html.Ol([
                            html.Li(f"{row['feature']}: {row['importance']:.4f}")
                            for _, row in top_5.iterrows()
                        ])
                    ])
                ])
            ], width=4),

            dbc.Col([
                dcc.Graph(figure=fig)
            ], width=8)
        ])
    ]


@app.callback(
    Output('results-section', 'children', allow_duplicate=True),
    [Input('predict-button', 'n_clicks'),
     Input('model-selector', 'value')],
    [State({'type': 'dynamic-input', 'index': ALL}, 'value')],
    prevent_initial_call=True
)
def make_prediction(n_clicks, model_type, input_values):
    if n_clicks is None:
        return dbc.Alert("üëÜ Click button above", color="info")

    model = churn_model_base if model_type == 'churn' else demand_model_base
    features = CHURN_FEATURES if model_type == 'churn' else DEMAND_FEATURES
    is_classifier = (model_type == 'churn')

    if model is None:
        return dbc.Alert("‚ö†Ô∏è Model not loaded", color="danger")

    # Create input dictionary - ensure numeric types
    input_dict = {}
    for i, feat_spec in enumerate(features):
        if i < len(input_values) and input_values[i] is not None:
            val = input_values[i]
            # Convert to appropriate type
            if feat_spec['type'] == 'number':
                # Only convert if value is actually numeric
                if isinstance(val, (int, float)) or (isinstance(val, str) and val.replace('.', '').replace('-', '').isdigit()):
                    input_dict[feat_spec['name']] = float(val) if '.' in str(val) or 'sale' in feat_spec['name'] else int(val)
                else:
                    # Keep as is if it's a string that can't be converted to number
                    input_dict[feat_spec['name']] = val
            else:
                input_dict[feat_spec['name']] = val
        else:
            input_dict[feat_spec['name']] = feat_spec['default']

    print(f"\n{'='*60}")
    print("Raw input:", input_dict)

    # Encode categorical features for churn
    if model_type == 'churn':
        encoded_dict = encode_features(input_dict, churn_encoders)
    else:
        encoded_dict = input_dict

    print("Encoded:", encoded_dict)

    # Create DataFrame with correct dtypes
    X_input = pd.DataFrame([encoded_dict])

    # Ensure all columns are numeric
    for col in X_input.columns:
        X_input[col] = pd.to_numeric(X_input[col], errors='coerce').fillna(0)

    print(f"DataFrame shape: {X_input.shape}")
    print(f"DataFrame dtypes:\n{X_input.dtypes}")
    print(f"{'='*60}\n")

    # Get explanation
    feature_names = [f['name'] for f in features]
    explanation = explain_prediction(model, X_input, feature_names, is_classifier)

    if 'error' in explanation:
        return dbc.Alert([
            html.H4("‚ö†Ô∏è Prediction Failed"),
            html.Hr(),
            html.P(f"Error: {explanation['error']}"),
            html.Small("Check console for details", className="text-muted")
        ], color="danger")

    # Handle prediction display
    pred_value = explanation['prediction']
    if isinstance(pred_value, np.ndarray):
        if pred_value.ndim > 0:
            pred_display = f"Classes: {pred_value}" if is_classifier else f"Value: {pred_value[0]:.4f}"
        else:
            pred_display = f"Class: {int(pred_value)}" if is_classifier else f"Value: {float(pred_value):.4f}"
    else:
        pred_display = f"Class: {pred_value}" if is_classifier else f"Value: {pred_value:.4f}"

    if explanation.get('feature_contributions') is None:
        return dbc.Alert([
            html.H4("üéØ Prediction Result"),
            html.H2(pred_display, className="text-primary")
        ], color="success")

    contrib_plot = create_contribution_plot(
        explanation['feature_contributions'],
        "Feature Contributions"
    )

    results = [
        dbc.Row([
            dbc.Col([
                dbc.Card([
                    dbc.CardBody([
                        html.H4("üéØ Prediction Result", className="mb-3"),
                        html.H2(pred_display, className="text-primary mb-2"),
                        html.Hr(),
                        html.H5("üîë Top Factors:", className="mb-2"),
                        html.Ul([
                            html.Li([
                                html.Strong(f"{row['feature']}: "),
                                f"{row['value']:.2f}"
                            ])
                            for _, row in explanation['top_features'].iterrows()
                        ])
                    ])
                ])
            ], width=4),

            dbc.Col([
                dcc.Graph(figure=contrib_plot)
            ], width=8)
        ])
    ]

    # Handle probability display
    if is_classifier and 'probability' in explanation:
        probs = explanation['probability']
        if isinstance(probs, np.ndarray):
            if probs.ndim == 2:
                prob_text = " | ".join([
                    f"Output {i}: " + ", ".join([f"Class {j}: {float(p):.2%}" for j, p in enumerate(prob_row)])
                    for i, prob_row in enumerate(probs)
                ])
            else:
                prob_text = " | ".join([f"Class {i}: {float(p):.2%}" for i, p in enumerate(probs)])
        else:
            prob_text = str(probs)

        results.insert(0, dbc.Alert(f"üìä Probabilities: {prob_text}", color="info"))

    return results

In [15]:
# ==================== COMPLETE FIXED DASHBOARD ====================
if __name__ == '__main__':
    print("üöÄ Starting Enhanced Dashboard...")
    print(f"‚úì Churn features: {len(CHURN_FEATURES)}")
    print(f"‚úì Demand features: {len(DEMAND_FEATURES)}")
    app.run(debug=True, host='0.0.0.0', port=8050)

üöÄ Starting Enhanced Dashboard...
‚úì Churn features: 15
‚úì Demand features: 47


<IPython.core.display.Javascript object>