In [None]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta

# ==========================================
# 1. INITIALIZATION & REPRODUCIBILITY
# ==========================================

fake = Faker()
Faker.seed(42)
np.random.seed(42)
random.seed(42)

NUM_ROWS = 5000

# --- CONFIGURATION ---
QUEUES = ['Transaction Disputes', 'Fraud Ops', 'Collections', 'Account Access',
          'Card Services', 'Credit Line', 'VIP Services', 'Tech Support', 'General Inquiry']

DISPOSITIONS = ['Escalated to Trust & Safety', 'Escalated to Lead', 'Provided Adjustment',
                'Customer Education', 'Duplicate Ticket Exists', 'Follow Up Needed',
                'Requires More Info', 'Callback Requested', 'Resolved', 'Ticket Closed']

# --- RISK TAXONOMY ---
LEGAL_PHRASES = {
    'Implied': ["This violates my consumer rights", "I know this is illegal", "Unfair lending practice"],
    'Inexplicit': ["I will sue your company", "You'll hear from my lawyer", "Prepare for legal action"],
    'Explicit Regulatory': ["I'm filing with the CFPB", "Reporting this to the BBB", "Calling the Attorney General"],
    'Litigation': ["I have already filed a lawsuit", "You've been served", "Pending litigation"]
}

RISK_PHRASES = {
    'Vulnerability': ["I can't afford to buy food", "I'm facing eviction", "Crisis situation"],
    'Abuse': ["You represent a scam", "Shut up and listen", "Stop lying to me"]
}

# ==========================================
# 2. DATA GENERATION ENGINE
# ==========================================

def generate_interactions(n):
    data = []
    start_date = datetime.now() - timedelta(days=365)

    for _ in range(n):
        queue = np.random.choice(QUEUES)
        tenure_months = int(np.random.triangular(1, 12, 60))
        
        row = {
            'interaction_id': f"INT-{random.randint(10000, 99999)}",
            'date': fake.date_between(start_date=start_date, end_date='today'),
            'agent_id': f"AGT-{random.randint(100, 150)}",
            'queue': queue,
            'agent_tenure_months': tenure_months,
            'duration_sec': random.randint(60, 3600),
            'agent_disposition': np.random.choice(DISPOSITIONS),
            'transferred_flag': 'N',
            'department_transferred': 'None',
            'disconnected_by': np.random.choice(['Customer', 'Agent', 'None'], p=[0.85, 0.05, 0.10]),
            'legal_mention': None,
            'high_risk_phrase': None,
            'scenario_label': 'Normal'
        }
        data.append(row)
    return pd.DataFrame(data)

def inject_risk(df):
    def apply_scenarios(row):
        # Weighted risk by queue (Collections/Fraud get more hits)
        p_risk = 0.15 if row['queue'] in ['Collections', 'Fraud Ops'] else 0.02
        
        if random.random() < p_risk:
            # Randomly pick between Legal, Vulnerability, or Abuse
            risk_type = random.choice(['Legal', 'Vuln', 'Abuse'])
            
            if risk_type == 'Legal':
                cat = random.choice(list(LEGAL_PHRASES.keys()))
                row['legal_mention'] = random.choice(LEGAL_PHRASES[cat])
                row['scenario_label'] = f"Legal_{cat}"
            elif risk_type == 'Vuln':
                row['high_risk_phrase'] = random.choice(RISK_PHRASES['Vulnerability'])
                row['scenario_label'] = "Risk_Vulnerability"
            else:
                row['high_risk_phrase'] = random.choice(RISK_PHRASES['Abuse'])
                row['scenario_label'] = "Risk_Abuse"
        return row
    return df.apply(apply_scenarios, axis=1)

def apply_governance(df):
    def govern(row):
        has_risk = (row['legal_mention'] is not None) or (row['high_risk_phrase'] is not None)
        is_escalated = str(row['agent_disposition']).startswith('Escalated')
        
        # Default Logic (Happy Path)
        if has_risk:
            # Failure injection based on tenure
            fail_prob = 0.25 if row['agent_tenure_months'] < 6 else 0.08
            
            if random.random() > fail_prob:
                row['transferred_flag'] = 'Y'
                row['agent_disposition'] = 'Escalated to Trust & Safety'
                row['department_transferred'] = 'Trust & Safety'
                row['escalation_validity'] = 'Valid_Escalation'
            else:
                row['escalation_validity'] = 'Missed_Escalation'
        else:
            if is_escalated:
                row['escalation_validity'] = 'Invalid_Disposition' # "Dumping"
            else:
                row['escalation_validity'] = 'No_Escalation_Required'
        
        return row
    return df.apply(govern, axis=1)

# ==========================================
# 3. EXECUTION & EXPORT
# ==========================================

interactions = generate_interactions(NUM_ROWS)
interactions = inject_risk(interactions)
interactions = apply_governance(interactions)

# Exporting as 'interactions.csv' to sync perfectly with Power BI
interactions.to_csv('interactions.csv', index=False)

print(f"Success! 'interactions.csv' generated with {len(interactions)} records.")
print(interactions['escalation_validity'].value_counts())