In [None]:
# Setup - ensure src is importable
import sys
from pathlib import Path

# Add project root to path
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Configure logging for notebook
import logging
logging.basicConfig(level=logging.INFO, format='%(message)s')

## Step 1: Generate Synthetic Data

We'll create 1,000 tickets and 50 macros for quick exploration.

In [None]:
from src.data_generation import generate_all_data

# Generate smaller dataset for quick exploration
macros_df, tickets_df, macro_usage_df = generate_all_data(
    num_tickets=1000,
    num_macros=50,
    save=True
)

print(f"Generated {len(macros_df)} macros")
print(f"Generated {len(tickets_df)} tickets")
print(f"Generated {len(macro_usage_df)} macro usage records")

In [None]:
# Preview the data
print("\nüìã Sample Tickets:")
display(tickets_df.head())

print("\nüìù Sample Macros:")
display(macros_df[["macro_id", "macro_name", "category", "is_active"]].head())

## Step 2: Clean and Preprocess

In [None]:
from src.data_cleaning import clean_all_data

tickets_clean, macros_clean, usage_clean = clean_all_data(save=True)

## Step 3: Engineer Features

In [None]:
from src.feature_engineering import engineer_all_features

tickets_features, macro_features = engineer_all_features(save=True)

print("\nüìä Macro Features:")
display(macro_features[["macro_id", "usage_count", "avg_csat", "avg_handle_time", "reopen_rate"]].head(10))

## Step 4: Score Macro Effectiveness

In [None]:
from src.macro_effectiveness import score_all_macros

macro_scores = score_all_macros(save=True)

print("\nüéØ Macro Effectiveness Scores:")
display(
    macro_scores.nlargest(10, "macro_effectiveness_index")[
        ["macro_id", "macro_name", "macro_effectiveness_index", "usage_count", "macro_category"]
    ]
)

## Step 5: Cluster Macros by Topic

In [None]:
from src.nlp_clustering import cluster_all_macros

macro_clusters, cluster_summary = cluster_all_macros(save=True)

print("\nüîç Cluster Summary:")
display(cluster_summary)

## Step 6: Generate Evaluation Report

In [None]:
from src.evaluation import evaluate_all

report = evaluate_all(save=True)
print(report)

## Quick Visualizations

In [None]:
import plotly.express as px

# Effectiveness Distribution
fig = px.histogram(
    macro_clusters[macro_clusters["has_sufficient_usage"]],
    x="macro_effectiveness_index",
    nbins=15,
    title="Macro Effectiveness Distribution",
    labels={"macro_effectiveness_index": "Effectiveness Index"}
)
fig.show()

In [None]:
# Usage vs Effectiveness Scatter
fig = px.scatter(
    macro_clusters[macro_clusters["has_sufficient_usage"]],
    x="usage_count",
    y="macro_effectiveness_index",
    color="macro_category",
    hover_data=["macro_id", "macro_name"],
    title="Usage vs Effectiveness",
    labels={
        "usage_count": "Usage Count",
        "macro_effectiveness_index": "Effectiveness Index"
    }
)
fig.show()

## Next Steps

1. **Launch Dashboard**: `streamlit run app/streamlit_app.py`
2. **Explore Data**: See `02_data_exploration.ipynb`
3. **Customize Config**: Edit `src/config.py`