In [None]:
# Setup
import sys
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from src.config import (
    TICKETS_FEATURES_FILE,
    MACRO_CLUSTERS_FILE,
    CLUSTER_SUMMARY_FILE
)

## Load Processed Data

In [None]:
# Load data
tickets = pd.read_csv(TICKETS_FEATURES_FILE)
macros = pd.read_csv(MACRO_CLUSTERS_FILE)
clusters = pd.read_csv(CLUSTER_SUMMARY_FILE)

print(f"Tickets: {len(tickets):,}")
print(f"Macros: {len(macros):,}")
print(f"Clusters: {len(clusters):,}")

## Ticket Analysis

In [None]:
# Basic statistics
print("üìà Ticket Statistics:")
print(f"  Avg CSAT: {tickets['csat_score'].mean():.2f}")
print(f"  Avg Handle Time: {tickets['total_handle_time_minutes'].mean():.1f} min")
print(f"  Reopen Rate: {(tickets['reopens_count'] > 0).mean():.1%}")
print(f"  % with Macro: {tickets['is_macro_used'].mean():.1%}")

In [None]:
# Contact Driver Distribution
fig = px.bar(
    tickets['contact_driver'].value_counts().reset_index(),
    x='contact_driver',
    y='count',
    title='Tickets by Contact Driver',
    labels={'contact_driver': 'Contact Driver', 'count': 'Ticket Count'}
)
fig.show()

In [None]:
# CSAT Distribution
fig = px.histogram(
    tickets,
    x='csat_score',
    title='CSAT Score Distribution',
    labels={'csat_score': 'CSAT Score'},
    nbins=5
)
fig.show()

In [None]:
# Macro Usage vs Outcomes
comparison = tickets.groupby('is_macro_used').agg({
    'csat_score': 'mean',
    'total_handle_time_minutes': 'mean',
    'reopens_count': lambda x: (x > 0).mean()
}).round(2)
comparison.columns = ['Avg CSAT', 'Avg Handle Time', 'Reopen Rate']
comparison.index = ['No Macro', 'Used Macro']

print("üìä Macro Usage Impact:")
display(comparison)

## Macro Analysis

In [None]:
# Category distribution
fig = px.pie(
    macros,
    names='category',
    title='Macros by Category'
)
fig.show()

In [None]:
# Effectiveness by Category
category_stats = macros[macros['has_sufficient_usage']].groupby('category').agg({
    'macro_effectiveness_index': 'mean',
    'usage_count': 'sum',
    'macro_id': 'count'
}).round(1)
category_stats.columns = ['Avg Effectiveness', 'Total Usage', 'Macro Count']

print("üìä Category Performance:")
display(category_stats.sort_values('Avg Effectiveness', ascending=False))

In [None]:
# Usage Distribution (Long Tail)
fig = px.histogram(
    macros,
    x='usage_count',
    nbins=30,
    title='Macro Usage Distribution (Long Tail)',
    labels={'usage_count': 'Usage Count'}
)
fig.show()

In [None]:
# Macro Categories
fig = px.bar(
    macros['macro_category'].value_counts().reset_index(),
    x='macro_category',
    y='count',
    title='Macros by Effectiveness Category',
    color='macro_category'
)
fig.show()

## Cluster Analysis

In [None]:
# Cluster overview
fig = px.bar(
    clusters.sort_values('avg_effectiveness', ascending=False),
    x='cluster_label',
    y='avg_effectiveness',
    color='num_macros',
    title='Cluster Effectiveness',
    labels={
        'cluster_label': 'Cluster',
        'avg_effectiveness': 'Avg Effectiveness',
        'num_macros': 'Macro Count'
    }
)
fig.update_xaxes(tickangle=45)
fig.show()

In [None]:
# Consolidation candidates
consolidation = clusters[clusters['consolidation_candidate']]
if len(consolidation) > 0:
    print("‚ö†Ô∏è Clusters Recommended for Consolidation:")
    display(consolidation[['cluster_label', 'num_macros', 'avg_effectiveness']])
else:
    print("‚úÖ No clusters flagged for consolidation")

## Key Insights Summary

In [None]:
# Summary statistics
print("="*60)
print("üìã KEY INSIGHTS")
print("="*60)

# Macro usage impact
macro_tickets = tickets[tickets['is_macro_used']]
no_macro_tickets = tickets[~tickets['is_macro_used']]

csat_lift = macro_tickets['csat_score'].mean() - no_macro_tickets['csat_score'].mean()
time_savings = no_macro_tickets['total_handle_time_minutes'].mean() - macro_tickets['total_handle_time_minutes'].mean()

print(f"\n1. MACRO IMPACT:")
print(f"   CSAT Lift: {csat_lift:+.2f} points when macros used")
print(f"   Time Savings: {time_savings:.1f} min faster with macros")

# Underused gems
gems = macros[macros['macro_category'] == 'Underused Gem']
print(f"\n2. UNDERUSED GEMS: {len(gems)} high-performing macros with low usage")

# Low performers
low_perf = macros[macros['macro_category'] == 'Low Effectiveness']
print(f"\n3. NEEDS ATTENTION: {len(low_perf)} low-effectiveness macros to review")

# Unused
unused = macros[macros['usage_count'] == 0]
print(f"\n4. UNUSED MACROS: {len(unused)} macros with zero usage")

print("\n" + "="*60)