<a href="https://colab.research.google.com/github/mosomo82/COMP_SCI_5530/blob/main/Project_Customer_Churn/src/Project_Customer_Narratives.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
pip install google-generativeai --quiet

# Generate behavioral narratives on 3 datasets using LLM Gemini


In [8]:
import pandas as pd
import google.generativeai as genai
import requests
import io
import time
import os
import random
import re
from google.colab import userdata # Import userdata to access secrets

# --- 1.i. CONFIGURATION ---

# Access your API key securely using Colab's Secrets Manager
# Add your API key to Colab secrets with the name 'GOOGLE_API_KEY'
try:
    GEMINI_API_KEY = userdata.get('GOOGLE_API_KEY') # Gemini
except userdata.SecretNotFoundError:
    raise ValueError("API key not found in Colab secrets. Please add it as 'GOOGLE_API_KEY'.")

# GitHub URL for your clean_data.csv file
DATA_URL = "https://raw.githubusercontent.com/mosomo82/COMP_SCI_5530/refs/heads/main/Project_Customer_Churn/clean_data/clean_data_text_generation.csv"

# Number of customers to sample
N_SAMPLES = 75

# Output file name
OUTPUT_FILE = "customers_narratives.csv"

# --- 2.i. CONFIGURE GEMINI API ---

try:
    genai.configure(api_key=GEMINI_API_KEY)
except AttributeError:
    print("Error: The Google Generative AI library is not installed correctly or the API key is invalid.")
    exit()

# Set up the model # temp for customer behavioral narrative
generation_config={
    "temperature": 0.8,
    "top_p": 0.9, # reduce randon word choices
    "max_output_tokens": 2048
}

safety_settings=[
      {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
      {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
      {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
      {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    ]

model = genai.GenerativeModel(model_name="gemini-2.5-flash",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

# --- 3. DYNAMIC PROMPT CREATION FUNCTION ---
# Dataset 1
# Chain-of-Thought Prompting

def create_prompt_cot_ds1(customer_row):
    """Creates a tailored LLM prompt based on the customer's profile."""

    # Base profile string
    profile = f"""
    - CustomerID: {customer_row['customerID']}
    - Contract: {customer_row['Contract_Encoded']}
    - Internet Service: {customer_row['InternetService']}
    - Online Security: {customer_row['OnlineSecurity']}
    - Tech Support Add-on: {customer_row['TechSupport']}
    - Monthly Charges: {customer_row['MonthlyCharges']}
    - Payment Method: {customer_row['AutomaticPayment']}
    """

    # Create different scenarios for "Churn" vs. "No Churn"
    if customer_row['Churn'] == 1:

        reasons = [
            f"frustration with high MonthlyCharges of ${customer_row['MonthlyCharges']}",
            f"unreliable {customer_row['InternetService']} internet service",
            f"no online protection {customer_row['OnlineSecurity']}",
            f"difficult getting help {customer_row['TechSupport']},"
            f"frustration with manual payment process or an issue with automatic payment {customer_row['AutomaticPayment']},"
            "a billing dispute",
        ]

        chosen_reason = random.choice(reasons)

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate behavior narrative (3 - 4 sentences)
        This customer is churnning (Churn = 1)

        CUSTOMER PROFILE:
        {profile}

        INSTRUCTIONS:
        Infer with customer's likely habits, personality and relationship with their services, focusing on

        1. Tech-related skills and issues: (from InternetService, OnlineSecurity, TechSupport)
        2. Financial and contractual habits: ( from Contract_Encoded, AutomaticPayment, MonthlyCharges)
        3. Overall customer "persona" created:

        """

    else:  # Churn == 0

        reasons = [
            f" a simple billing question that got resolved",
            f" the reliable {customer_row['InternetService']} service",
            f"appreciating the included {customer_row['OnlineSecurity']} OnlineSecurity",
            f" a quick technical question {customer_row['TechSupport']} that got resolved easily",
            f"the convenience of automatic payments {customer_row['AutomaticPayment']}",
        ]

        chosen_reason = f"praising {random.choice(reasons)}"

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate behavior narrative (3 - 4 sentences)
        This cusotmer is not churnning.

        CUSTOMER PROFILE:
        {profile}

         INSTRUCTIONS:
        Infer with customer's likely habits, personality and relationship with their services, focusing on

        1. Tech-related skills and issues: (from InternetService, OnlineSecurity, TechSupport)
        2. Financial and contractual habits: ( from Contract_Encoded, AutomaticPayment, MonthlyCharges)
        3. Overall customer "persona" created:

         """

    return prompt

# --- 3. DYNAMIC PROMPT CREATION FUNCTION ---
# Dataset 2
# # Chain-of-Thought Prompting

def create_prompt_cot_ds2(customer_row):
    """Creates a tailored LLM prompt based on the customer's profile."""

    # Base profile string
    profile = f"""
    - CustomerID: {customer_row['customerID']}
    - SeniorCitizen: {customer_row['SeniorCitizen']}
    - Partner: {customer_row['Partner']}
    - Dependents: {customer_row['Dependents']}
    - MultipleLines: {customer_row['MultipleLines']}
    - InternetService: {customer_row['InternetService']}
    - OnlineBackup: {customer_row['OnlineBackup']}
    - Tech Support Add-on: {customer_row['TechSupport']}
    - Contract: {customer_row['Contract_Encoded']}
    - Monthly Charges: {customer_row['MonthlyCharges']}
    - TotalCharges: {customer_row['TotalCharges']}
    """
    # Create different scenarios for "Churn" vs. "No Churn"
    if customer_row['Churn'] == 1:

        # Reasons are updated to use the new, more detailed variables
        reasons = [
            f"frustration with high MonthlyCharges of ${customer_row['MonthlyCharges']}",
            f"unreliable {customer_row['InternetService']} internet service",
            f"a poor experience with TechSupport ({customer_row['TechSupport']})",
            f"a billing dispute related to their MonthlCharges (${customer_row['MonthlyCharges']})",
            f"feeling the high ${customer_row['MonthlyCharges']} isn't justified for their household (Partner: {customer_row['Partner']}, Dependents: {customer_row['Dependents']})",
            f"feeling undervalued as a loyal customer (TotalCharges: ${customer_row['TotalCharges']})",
            f"the {customer_row['Contract_Encoded']} contract ending and finding a better offer elsewhere",
            f"as a SeniorCitizen ({customer_row['SeniorCitizen']}), finding the tech (like {customer_row['OnlineBackup']}) too complex for the price."
        ]

        chosen_reason = random.choice(reasons)

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate a behavioral narrative (3-4 sentences) for this customer who is churning (Churn = 1).

        CUSTOMER PROFILE:
        {profile}

        INSTRUCTIONS:
        Infer the customer's likely habits, personality, and relationship with their services. Focus on:

        1. **Household & Social Context:** (Analyze SeniorCitizen, Partner, Dependents)
        2. **Tech-Savviness & Needs:** (Analyze InternetService, OnlineBackup, TechSupport, MultipleLines)
        3. **Financial & Contractual Stance:** (Analyze Contract_Encoded, MonthlyCharges, and TotalCharges. Note if TotalCharges is high, implying long tenure.)
        4. **Overall "persona" created by this data:**
        """

    else:  # Churn == 0

        # Reasons are updated to reflect loyalty and satisfaction with new variables
        reasons = [
            f"a simple billing question that was resolved quickly",
            f"the reliable {customer_row['InternetService']} service for their household (Partner: {customer_row['Partner']})",
            f"a quick, positive experience with TechSupport ({customer_row['TechSupport']})",
            f"appreciating the value of add-ons like {customer_row['OnlineBackup']}",
            f"satisfaction with their {customer_row['Contract_Encoded']} contract terms",
            f"feeling the service is a good value (${customer_row['MonthlyCharges']}) for their needs (Dependents: {customer_row['Dependents']})",
            f"feeling valued as a loyal, long-term customer (TotalCharges: ${customer_row['TotalCharges']})"
        ]

        chosen_reason = f"praising {random.choice(reasons)}"

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate a behavioral narrative (3-4 sentences) for this loyal customer who is NOT churning (Churn = 0).

        CUSTOMER PROFILE:
        {profile}

         INSTRUCTIONS:
        Infer the customer's likely habits, personality, and relationship with their services. Focus on:

        1. **Household & Social Context:** (Analyze SeniorCitizen, Partner, Dependents)
        2. **Tech-Savviness & Needs:** (Analyze InternetService, OnlineBackup, TechSupport, MultipleLines)
        3. **Financial & Contractual Stance:** (Analyze Contract_Encoded, MonthlyCharges, and TotalCharges. Note if TotalCharges is high, implying long tenure.)
        4. **Overall "persona" created by this data:**
         """

    return prompt

# --- 3. DYNAMIC PROMPT CREATION FUNCTION ---
# Dataset 3
# A Few-Shot Prompting

FEW_SHOT_EXAMPLES = """
### EXAMPLE 1 (Churn = 1: The High-Cost, Low-Value Frustration)

CUSTOMER PROFILE:
- CustomerID: 5575-GNVDE
- Dependents: No
- InternetService: DSL
- Tech Support Add-on: No
- Contract: One Year
- TotalCharges: 789.25

BEHAVIORAL NARRATIVE:
This customer, living without dependents, appears to be a moderate-use individual who tolerated the DSL service but lacked essential protections like Tech Support. Despite having a moderate contract length (One Year), their churn was triggered by the perceived low value for the cumulative cost, indicated by their sizable TotalCharges. Their final action suggests a tech-aware user who became frustrated with self-service issues and ultimately prioritized cost-effectiveness over loyalty.

---

### EXAMPLE 2 (Churn = 0: The Loyal, Family User)

CUSTOMER PROFILE:
- CustomerID: 3668-QPYBK
- Dependents: Yes
- InternetService: Fiber optic
- Tech Support Add-on: Yes
- Contract: Two Year
- TotalCharges: 5425.05

BEHAVIORAL NARRATIVE:
This customer runs a high-demand household with dependents, requiring robust service features like Fiber Optic internet and dedicated Tech Support, which they readily adopted. Their significant TotalCharges and active Two Year contract highlight deep loyalty and a low-risk financial profile. The convenience and stability of the comprehensive service package ensure their satisfaction, making them a highly stable and valuable long-term asset.

---

### EXAMPLE 3 (Churn = 1: The Short-Term Value Seeker)

CUSTOMER PROFILE:
- CustomerID: 7590-VHIVE
- Dependents: No
- InternetService: No
- Tech Support Add-on: No internet service
- Contract: Month-to-month
- TotalCharges: 19.95

BEHAVIORAL NARRATIVE:
A transient customer focused solely on a basic phone service with no internet, their low TotalCharges reflect a very short or minimal engagement. Without dependents, they have maximum flexibility and zero reliance on tech support. Their month-to-month contract allowed for an easy, impulsive churn decision when a cheaper alternative was found, pointing to a highly price-sensitive and non-committal persona.
"""

def create_prompt_few_shot_ds3(customer_row, examples=FEW_SHOT_EXAMPLES):
    """
    Creates a tailored LLM prompt using Few-Shot In-Context Learning.
    The few-shot examples are prepended to the new customer's data.
    """

    # 1. Base profile string for the new customer (No changes needed)
    profile = f"""
    - CustomerID: {customer_row['customerID']}
    - Dependents: {customer_row['Dependents']}
    - InternetService: {customer_row['InternetService']}
    - Tech Support Add-on: {customer_row['TechSupport']}
    - Contract: {customer_row['Contract_Encoded']}
    - TotalCharges: {customer_row['TotalCharges']}
    """

    # 2. Status Instruction for the New Customer (Simplified)
    if customer_row['Churn'] == 1:
        # Note: The 'reasons' list is now ONLY used to set a positive/negative tone,
        # but the specific choice is NOT used in the final prompt instruction.
        status_instruction = "This customer is **churning (Churn = 1)**. Infer a comprehensive narrative detailing the likely reasons for their departure."
    else:
        status_instruction = "This customer is **retained (Churn = 0)**. Infer a comprehensive narrative detailing their positive customer experience and reasons for loyalty."

    # 3. Assemble the Final Few-Shot Prompt
    prompt = f"""
You are a senior customer behavior analyst. Your task is to generate a behavioral narrative (3-4 sentences) based on the provided customer profile.

**GUIDELINES:**
1.  **Style Match:** Strictly adopt the sophisticated, analytical style used in the examples below.
2.  **Focus:** The narrative must synthesize the customer's household context, tech needs, contract terms and financial stance.
3.  **Output Format:** Provide only the final BEHAVIORAL NARRATIVE text.

{examples}

---
### NEW CUSTOMER TO ANALYZE

{status_instruction}

CUSTOMER PROFILE:
{profile}

BEHAVIORAL NARRATIVE:
"""
    return prompt

prompt_strategies = {
    "1. Chain-of-Thought Prompting (Dataset 1)": create_prompt_cot_ds1,
    "2. Chain-of-Thought Prompting (Dataset 2)": create_prompt_cot_ds2,
    "3. Few-Shot Prompting (Dataset 3)": create_prompt_few_shot_ds3
}

SELECTED_PROMPT_NAME = "3. Few-Shot Prompting (Dataset 3)" # @param ["1. Chain-of-Thought Prompting (Dataset 1)", "2. Chain-of-Thought Prompting (Dataset 2)", "3. Few-Shot Prompting (Dataset 3)"]

print(f"Loading data from {DATA_URL}...")
try:
    response = requests.get(DATA_URL)
    response.raise_for_status() # Raise an error for bad responses
    df = pd.read_csv(io.StringIO(response.text))
    print(f"Successfully loaded {len(df)} total customers.")
except requests.exceptions.RequestException as e:
    print(f"Error: Failed to fetch data from GitHub. {e}")
    exit()

# Create a balanced sample
df_churn_yes = df[df['Churn'] == 1].sample(n=N_SAMPLES, random_state=42)
df_churn_no = df[df['Churn'] == 0].sample(n=N_SAMPLES, random_state=42)
df_sample = pd.concat([df_churn_yes, df_churn_no])

print(f"Created a balanced sample of {len(df_sample)} customers.")

# --- 5.i. GENERATE NARRATIVE ---
prompt_function = prompt_strategies.get(SELECTED_PROMPT_NAME)

print(f"\nRunning '{SELECTED_PROMPT_NAME}' ---")

all_narrative = []
total = len(df_sample)
MAX_RETRIES = 3

print(f"\nStarting generation of {total} behavioral narratives ...")

for index, row in df_sample.iterrows():
    # 1. Create the dynamic prompt
    prompt_text = prompt_function(row)

    retries = 0
    while retries < MAX_RETRIES:
        try:
            # 2. Call the API
            response = model.generate_content(prompt_text)
            narrative = response.text

            # 3. Store the result
            all_narrative.append({
                'CustomerID': row['customerID'],
                'Narrative': narrative,
                'OriginalChurnStatus': row['Churn']
            })

            print(f"({len(all_narrative)}/{total}) Generated Behavioral Narrative for {row['customerID']})")
            break # Break out of the retry loop on success

        except Exception as e:
            retries += 1
            print(f"Error generating content for {row['customerID']} on attempt {retries}. Retrying...")
            if retries < MAX_RETRIES:
                time.sleep(2) # Wait a bit before retrying
            else:
                print(f"Error: Failed to generate content for {row['customerID']} after {MAX_RETRIES} retries. Skipping.:{e}")
                all_narrative.append({
                    'CustomerID': row['customerID'],
                    'Narrative': f"Error: Generation failed. {e}",
                    'OriginalChurnStatus': row['Churn']
                })
    # IMPORTANT: Add a delay to respect API rate limits (e.g., 60 requests/minute)
    # Only sleep if we successfully generated or exhausted retries
    if retries < MAX_RETRIES or (retries == MAX_RETRIES and len(all_narrative) < total):
         time.sleep(1.5)

# --- 6. SAVE FINAL FILE ---

print("\n...Generation complete.")
df_output = pd.DataFrame(all_narrative)

# Save to CSV
df_output.to_csv(OUTPUT_FILE, index=False, encoding='utf-8')

print(f"\nSuccessfully created '{OUTPUT_FILE}' with {len(df_output)} entries.")
display(df_output.head())

TimeoutException: Requesting secret GOOGLE_API_KEY timed out. Secrets can only be fetched when running from the Colab UI.

# Generate behavioral narratives on 3 datasets using LLM OpenAI (optional)

In [10]:
from openai import OpenAI

# --- 1.ii. CONFIGURATION ---

try:
    OPEN_API_KEY = userdata.get('OPENAI_API_KEY')
    client = OpenAI(api_key=OPEN_API_KEY)
except userdata.SecretNotFoundError:
    raise ValueError("API key not found in Colab secrets. Please add it as 'OPENAI_API_KEY'.")
except Exception as e:
    print(f"Error: Could not initialize OpenAI client. {e}")
    exit()


# GitHub URL for your clean_data.csv file
DATA_URL = "https://raw.githubusercontent.com/mosomo82/COMP_SCI_5530/refs/heads/main/Project_Customer_Churn/clean_data/clean_data_text_generation.csv"

# Number of customers to sample
N_SAMPLES = 75

# --- 2.ii.  CONFIGURE OPENAI API ---

# Initialize the OpenAI client
try:
    client = OpenAI(api_key=OPEN_API_KEY)
except Exception as e:
    print(f"Error: The OpenAI library is not installed correctly or the API key is invalid. {e}")
    exit()

# --- 3. DYNAMIC PROMPT CREATION FUNCTION ---
# Dataset 1
# Chain-of-Thought Prompting

def create_prompt_cot_ds1(customer_row):
    """Creates a tailored LLM prompt based on the customer's profile."""

    # Base profile string
    profile = f"""
    - CustomerID: {customer_row['customerID']}
    - Contract: {customer_row['Contract_Encoded']}
    - Internet Service: {customer_row['InternetService']}
    - Online Security: {customer_row['OnlineSecurity']}
    - Tech Support Add-on: {customer_row['TechSupport']}
    - Monthly Charges: {customer_row['MonthlyCharges']}
    - Payment Method: {customer_row['AutomaticPayment']}
    """

    # Create different scenarios for "Churn" vs. "No Churn"
    if customer_row['Churn'] == 1:

        reasons = [
            f"frustration with high MonthlyCharges of ${customer_row['MonthlyCharges']}",
            f"unreliable {customer_row['InternetService']} internet service",
            f"no online protection {customer_row['OnlineSecurity']}",
            f"difficult getting help {customer_row['TechSupport']},"
            f"frustration with manual payment process or an issue with automatic payment {customer_row['AutomaticPayment']},"
            "a billing dispute",
        ]

        chosen_reason = random.choice(reasons)

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate behavior narrative (3 - 4 sentences)
        This customer is churnning (Churn = 1)

        CUSTOMER PROFILE:
        {profile}

        INSTRUCTIONS:
        Infer with customer's likely habits, personality and relationship with their services, focusing on

        1. Tech-related skills and issues: (from InternetService, OnlineSecurity, TechSupport)
        2. Financial and contractual habits: ( from Contract_Encoded, AutomaticPayment, MonthlyCharges)
        3. Overall customer "persona" created:

        """

    else:  # Churn == 0

        reasons = [
            f" a simple billing question that got resolved",
            f" the reliable {customer_row['InternetService']} service",
            f"appreciating the included {customer_row['OnlineSecurity']} OnlineSecurity",
            f" a quick technical question {customer_row['TechSupport']} that got resolved easily",
            f"the convenience of automatic payments {customer_row['AutomaticPayment']}",
        ]

        chosen_reason = f"praising {random.choice(reasons)}"

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate behavior narrative (3 - 4 sentences)
        This cusotmer is not churnning.

        CUSTOMER PROFILE:
        {profile}

         INSTRUCTIONS:
        Infer with customer's likely habits, personality and relationship with their services, focusing on

        1. Tech-related skills and issues: (from InternetService, OnlineSecurity, TechSupport)
        2. Financial and contractual habits: ( from Contract_Encoded, AutomaticPayment, MonthlyCharges)
        3. Overall customer "persona" created:

         """

    return prompt

# --- 3. DYNAMIC PROMPT CREATION FUNCTION ---
# Dataset 2
# # Chain-of-Thought Prompting

def create_prompt_cot_ds2(customer_row):
    """Creates a tailored LLM prompt based on the customer's profile."""

    # Base profile string
    profile = f"""
    - CustomerID: {customer_row['customerID']}
    - SeniorCitizen: {customer_row['SeniorCitizen']}
    - Partner: {customer_row['Partner']}
    - Dependents: {customer_row['Dependents']}
    - MultipleLines: {customer_row['MultipleLines']}
    - InternetService: {customer_row['InternetService']}
    - OnlineBackup: {customer_row['OnlineBackup']}
    - Tech Support Add-on: {customer_row['TechSupport']}
    - Contract: {customer_row['Contract_Encoded']}
    - Monthly Charges: {customer_row['MonthlyCharges']}
    - TotalCharges: {customer_row['TotalCharges']}
    """
    # Create different scenarios for "Churn" vs. "No Churn"
    if customer_row['Churn'] == 1:

        # Reasons are updated to use the new, more detailed variables
        reasons = [
            f"frustration with high MonthlyCharges of ${customer_row['MonthlyCharges']}",
            f"unreliable {customer_row['InternetService']} internet service",
            f"a poor experience with TechSupport ({customer_row['TechSupport']})",
            f"a billing dispute related to their MonthlCharges (${customer_row['MonthlyCharges']})",
            f"feeling the high ${customer_row['MonthlyCharges']} isn't justified for their household (Partner: {customer_row['Partner']}, Dependents: {customer_row['Dependents']})",
            f"feeling undervalued as a loyal customer (TotalCharges: ${customer_row['TotalCharges']})",
            f"the {customer_row['Contract_Encoded']} contract ending and finding a better offer elsewhere",
            f"as a SeniorCitizen ({customer_row['SeniorCitizen']}), finding the tech (like {customer_row['OnlineBackup']}) too complex for the price."
        ]

        chosen_reason = random.choice(reasons)

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate a behavioral narrative (3-4 sentences) for this customer who is churning (Churn = 1).

        CUSTOMER PROFILE:
        {profile}

        INSTRUCTIONS:
        Infer the customer's likely habits, personality, and relationship with their services. Focus on:

        1. **Household & Social Context:** (Analyze SeniorCitizen, Partner, Dependents)
        2. **Tech-Savviness & Needs:** (Analyze InternetService, OnlineBackup, TechSupport, MultipleLines)
        3. **Financial & Contractual Stance:** (Analyze Contract_Encoded, MonthlyCharges, and TotalCharges. Note if TotalCharges is high, implying long tenure.)
        4. **Overall "persona" created by this data:**
        """

    else:  # Churn == 0

        # Reasons are updated to reflect loyalty and satisfaction with new variables
        reasons = [
            f"a simple billing question that was resolved quickly",
            f"the reliable {customer_row['InternetService']} service for their household (Partner: {customer_row['Partner']})",
            f"a quick, positive experience with TechSupport ({customer_row['TechSupport']})",
            f"appreciating the value of add-ons like {customer_row['OnlineBackup']}",
            f"satisfaction with their {customer_row['Contract_Encoded']} contract terms",
            f"feeling the service is a good value (${customer_row['MonthlyCharges']}) for their needs (Dependents: {customer_row['Dependents']})",
            f"feeling valued as a loyal, long-term customer (TotalCharges: ${customer_row['TotalCharges']})"
        ]

        chosen_reason = f"praising {random.choice(reasons)}"

        prompt = f"""
        You are a senior customer behavior analyst. Based on {chosen_reason}, generate a behavioral narrative (3-4 sentences) for this loyal customer who is NOT churning (Churn = 0).

        CUSTOMER PROFILE:
        {profile}

         INSTRUCTIONS:
        Infer the customer's likely habits, personality, and relationship with their services. Focus on:

        1. **Household & Social Context:** (Analyze SeniorCitizen, Partner, Dependents)
        2. **Tech-Savviness & Needs:** (Analyze InternetService, OnlineBackup, TechSupport, MultipleLines)
        3. **Financial & Contractual Stance:** (Analyze Contract_Encoded, MonthlyCharges, and TotalCharges. Note if TotalCharges is high, implying long tenure.)
        4. **Overall "persona" created by this data:**
         """

    return prompt

# --- 3. DYNAMIC PROMPT CREATION FUNCTION ---
# Dataset 3
# A Few-Shot Prompting

FEW_SHOT_EXAMPLES = """
### EXAMPLE 1 (Churn = 1: The High-Cost, Low-Value Frustration)

CUSTOMER PROFILE:
- CustomerID: 5575-GNVDE
- Dependents: No
- InternetService: DSL
- Tech Support Add-on: No
- Contract: One Year
- TotalCharges: 789.25

BEHAVIORAL NARRATIVE:
This customer, living without dependents, appears to be a moderate-use individual who tolerated the DSL service but lacked essential protections like Tech Support. Despite having a moderate contract length (One Year), their churn was triggered by the perceived low value for the cumulative cost, indicated by their sizable TotalCharges. Their final action suggests a tech-aware user who became frustrated with self-service issues and ultimately prioritized cost-effectiveness over loyalty.

---

### EXAMPLE 2 (Churn = 0: The Loyal, Family User)

CUSTOMER PROFILE:
- CustomerID: 3668-QPYBK
- Dependents: Yes
- InternetService: Fiber optic
- Tech Support Add-on: Yes
- Contract: Two Year
- TotalCharges: 5425.05

BEHAVIORAL NARRATIVE:
This customer runs a high-demand household with dependents, requiring robust service features like Fiber Optic internet and dedicated Tech Support, which they readily adopted. Their significant TotalCharges and active Two Year contract highlight deep loyalty and a low-risk financial profile. The convenience and stability of the comprehensive service package ensure their satisfaction, making them a highly stable and valuable long-term asset.

---

### EXAMPLE 3 (Churn = 1: The Short-Term Value Seeker)

CUSTOMER PROFILE:
- CustomerID: 7590-VHIVE
- Dependents: No
- InternetService: No
- Tech Support Add-on: No internet service
- Contract: Month-to-month
- TotalCharges: 19.95

BEHAVIORAL NARRATIVE:
A transient customer focused solely on a basic phone service with no internet, their low TotalCharges reflect a very short or minimal engagement. Without dependents, they have maximum flexibility and zero reliance on tech support. Their month-to-month contract allowed for an easy, impulsive churn decision when a cheaper alternative was found, pointing to a highly price-sensitive and non-committal persona.
"""

def create_prompt_few_shot_ds3(customer_row, examples=FEW_SHOT_EXAMPLES):
    """
    Creates a tailored LLM prompt using Few-Shot In-Context Learning.
    The few-shot examples are prepended to the new customer's data.
    """

    # 1. Base profile string for the new customer (No changes needed)
    profile = f"""
    - CustomerID: {customer_row['customerID']}
    - Dependents: {customer_row['Dependents']}
    - InternetService: {customer_row['InternetService']}
    - Tech Support Add-on: {customer_row['TechSupport']}
    - Contract: {customer_row['Contract_Encoded']}
    - TotalCharges: {customer_row['TotalCharges']}
    """

    # 2. Status Instruction for the New Customer (Simplified)
    if customer_row['Churn'] == 1:
        # Note: The 'reasons' list is now ONLY used to set a positive/negative tone,
        # but the specific choice is NOT used in the final prompt instruction.
        status_instruction = "This customer is **churning (Churn = 1)**. Infer a comprehensive narrative detailing the likely reasons for their departure."
    else:
        status_instruction = "This customer is **retained (Churn = 0)**. Infer a comprehensive narrative detailing their positive customer experience and reasons for loyalty."

    # 3. Assemble the Final Few-Shot Prompt
    prompt = f"""
You are a senior customer behavior analyst. Your task is to generate a behavioral narrative (3-4 sentences) based on the provided customer profile.

**GUIDELINES:**
1.  **Style Match:** Strictly adopt the sophisticated, analytical style used in the examples below.
2.  **Focus:** The narrative must synthesize the customer's household context, tech needs, contract terms and financial stance.
3.  **Output Format:** Provide only the final BEHAVIORAL NARRATIVE text.

{examples}

---
### NEW CUSTOMER TO ANALYZE

{status_instruction}

CUSTOMER PROFILE:
{profile}

BEHAVIORAL NARRATIVE:
"""
    return prompt

prompt_strategies = {
    "1. Chain-of-Thought Prompting (Dataset 1)": create_prompt_cot_ds1,
    "2. Chain-of-Thought Prompting (Dataset 2)": create_prompt_cot_ds2,
    "3. Few-Shot Prompting (Dataset 3)": create_prompt_few_shot_ds3
}

SELECTED_PROMPT_NAME = "3. Few-Shot Prompting (Dataset 3)" # @param ["1. Chain-of-Thought Prompting (Dataset 1)", "2. Chain-of-Thought Prompting (Dataset 2)", "3. Few-Shot Prompting (Dataset 3)"]

# --- 4. LOAD AND SAMPLE DATA ---

print(f"Loading data from {DATA_URL}...")
try:
    response = requests.get(DATA_URL)
    response.raise_for_status() # Raise an error for bad responses
    df = pd.read_csv(io.StringIO(response.text))
    print(f"Successfully loaded {len(df)} total customers.")
except requests.exceptions.RequestException as e:
    print(f"Error: Failed to fetch data from GitHub. {e}")
    exit()

# Create a balanced sample
df_churn_yes = df[df['Churn'] == 1].sample(n=N_SAMPLES, random_state=42)
df_churn_no = df[df['Churn'] == 0].sample(n=N_SAMPLES, random_state=42)
df_sample = pd.concat([df_churn_yes, df_churn_no])

print(f"Created a balanced sample of {len(df_sample)} customers.")

# --- 5.ii. GENERATE NARRATIVE ---
prompt_function = prompt_strategies.get(SELECTED_PROMPT_NAME)
print(f"\nRunning '{SELECTED_PROMPT_NAME}' ---")

all_narrative = []
total = len(df_sample)
MAX_RETRIES = 3
MODEL_NAME = "gpt-4o-mini" # Or "gpt-3.5-turbo"

print(f"\nStarting generation of {total} behavioral narratives using OpenAI model: {MODEL_NAME} ...")

for index, row in df_sample.iterrows():
    # 1. Create the dynamic prompt messages
    prompt_messages = prompt_function(row)

    retries = 0
    while retries < MAX_RETRIES:
        try:
            # 2. Call the OpenAI API
            response = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt_messages}],
                temperature=0.8,      # From your original config
                top_p=0.9,            # From your original config
                max_tokens=2048       # From your original config (renamed)
            )

            # 3. Extract the text from the response object
            narrative = response.choices[0].message.content.strip()

            # 4. Store the result
            all_narrative.append({
                'CustomerID': row['customerID'],
                'Narrative': narrative,
                'OriginalChurnStatus': row['Churn']
            })

            print(f"({len(all_narrative)}/{total}) Generated Behavioral Narrative for {row['customerID']})")
            break # Break out of the retry loop on success

        except Exception as e:
            retries += 1
            print(f"Error generating content for {row['customerID']} on attempt {retries}. Retrying...")
            if retries < MAX_RETRIES:
                time.sleep(2) # Wait a bit before retrying
            else:
                print(f"Error: Failed to generate content for {row['customerID']} after {MAX_RETRIES} retries. Skipping.:{e}")
                all_narrative.append({
                    'CustomerID': row['customerID'],
                    'Narrative': f"Error: Generation failed. {e}",
                    'OriginalChurnStatus': row['Churn']
                })

    # IMPORTANT: Add a delay to respect API rate limits
    # This is still a good idea for any API, including OpenAI's
    if retries < MAX_RETRIES or (retries == MAX_RETRIES and len(all_narrative) < total):
        time.sleep(1.5)


# --- 6. SAVE FINAL FILE ---

print("\n...Generation complete.")
df_output = pd.DataFrame(all_narrative)

# Save to CSV
df_output.to_csv(OUTPUT_FILE, index=False, encoding='utf-8')

print(f"\nSuccessfully created '{OUTPUT_FILE}' with {len(df_output)} entries.")
display(df_output.head())

Error: Could not initialize OpenAI client. Requesting secret OPENAI_API_KEY timed out. Secrets can only be fetched when running from the Colab UI.
Loading data from https://raw.githubusercontent.com/mosomo82/COMP_SCI_5530/refs/heads/main/Project_Customer_Churn/clean_data/clean_data_text_generation.csv...
Successfully loaded 7032 total customers.
Created a balanced sample of 150 customers.

Starting generation of 150 behavioral narratives using OpenAI model: gpt-4o-mini ...


NameError: name 'create_prompt' is not defined

# Use the transformer to extract their sentiments, pain points, and satisfaction levels.

**Note** Need to upload pre-generated customers_narratives (which are saved from OUTPUT_FILE.csv above) to proceed. We have provided these file to skip the time-consuming and constraints in generation process.

In [None]:
!pip install transformers torch --quiet

## Zero-Shot Classification Approach

In [None]:
from transformers import pipeline

# --- 2. Load Your Data ---
file_name = "customers_narratives.csv"  # The file you uploaded
try:
    df = pd.read_csv(file_name)
    print(f"Successfully loaded {file_name}")
except FileNotFoundError:
    print(f"Error: The file {file_name} was not found.")
    exit()

# --- 3.i Set up the Zero-Shot Classification Pipeline ---
print("Loading classification model...")
classifier = pipeline("zero-shot-classification",
                        model="valhalla/distilbart-mnli-12-3")
print("Model loaded successfully.")

# --- 4. Define Your Custom Labels ---
# Appply more descriptive phases instead of single,abstract words for better "hypothesis"
sentiment_labels = ['positive sentiment', 'negative sentiment', 'mixed or no sentiment']

pain_point_labels = [
    'high price',
    'unreliable service',
    'poor tech support',
    'billing dispute',
    'complex tech',
    'lack of value',
    'no specific issue'
]

satisfaction_labels = [
    'very satisfied',
    'satisfied',
    'neither satisfied nor dissatisfied',
    'dissatisfied',
    'very dissatisfied'
]

# Define the mapping based on the specialized model's output
sentiment_map = {
    'negative': 'negative sentiment',
    'neutral': 'mixed or no sentiment',
    'positive': 'positive sentiment'
}

# --- 5.i. Define the Analysis Function ---
def analyze_narrative(narrative):
    """
    Analyzes a single narrative for sentiment, pain points,
    and satisfaction.
    """
    if not isinstance(narrative, str):
        return {'Sentiment': 'N/A', 'Pain_Point': 'N/A', 'Satisfaction': 'N/A'}

    try:
        # We set multi_label=False to force the model to pick only the single best label
        sent_result = classifier(narrative, sentiment_labels, multi_label=False)
        pain_result = classifier(narrative, pain_point_labels, multi_label=False)
        sat_result = classifier(narrative, satisfaction_labels, multi_label=False)

        # Return a dictionary with the top-scoring label for each category
        return {
            'Sentiment': sent_result['labels'][0],
            'Pain_Point': pain_result['labels'][0],
            'Satisfaction': sat_result['labels'][0]
        }
    except Exception as e:
        print(f"Error processing narrative: {e}")
        return {'Sentiment': 'Error', 'Pain_Point': 'Error', 'Satisfaction': 'Error'}

# --- 6. Apply the Function to Your DataFrame ---
print("\nAnalyzing narratives... This may take a moment.")
all_results= []

# Loop through each row to analyze its 'Narrative'
for index, row in df.iterrows():
    result = analyze_narrative(row['Narrative'])
    all_results.append(result)

# Convert the list of dictionaries into a new DataFrame
analysis_df = pd.DataFrame(all_results)

# Join the new analysis columns back to your original DataFrame
df_final = pd.concat([df, analysis_df], axis=1)

# --- 7. Save and Display Results ---
output_filename = "narratives_with_sentiment_zeroshot.csv"
df_final.to_csv(output_filename, index=False, encoding='utf-8')

print(f"\nAnalysis complete. Results saved to {output_filename}")
print("\n--- Final Data Head with Analysis ---")
display(df_final.head())

Successfully loaded customers_narratives.csv
Loading specialized sentiment model...


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


Loading zero-shot model...


Device set to use cuda:0


Models loaded successfully.

Analyzing narratives... This may take a moment.

Analysis complete. Results saved to narratives_with_sentiment.csv

--- Final Data Head with Analysis ---


Unnamed: 0,CustomerID,Narrative,OriginalChurnStatus,Sentiment,Pain_Point,Satisfaction
0,6302-JGYRJ,"This customer, managing a household with a dep...",1,negative sentiment,high price,dissatisfied
1,2320-JRSDE,"This customer, supporting a dependent, maintai...",1,negative sentiment,high price,dissatisfied
2,2332-EFBJY,"This customer, a single individual without dep...",1,mixed or no sentiment,high price,dissatisfied
3,1624-WOIWJ,"This individual, without dependents, initially...",1,negative sentiment,high price,dissatisfied
4,9391-EOYLI,This single-user customer prioritized high-per...,1,mixed or no sentiment,poor tech support,dissatisfied


## Hybrid Approach to extract sentiment, pain points and satisfaction levels

In [None]:
# --- 3.iii. Set up a Hybrid Approach by Specializing Text-classfication Pipeline
# a. Set up the Sentiment Pipeline ---
print("Loading specialized sentiment model...")
sentiment_classifier = pipeline("sentiment-analysis",
                                model="cardiffnlp/twitter-roberta-base-sentiment-latest")

# b. Set up the ZERO-SHOT Pipeline (for custom labels) ---
print("Loading zero-shot model...")
zero_shot_classifier = pipeline("zero-shot-classification",
                                model="valhalla/distilbart-mnli-12-3")
                                # Or the bigger one: "facebook/bart-large-mnli"
print("Models loaded successfully.")

# --- 4. Define Your Custom Labels ---
# Appply more descriptive phases instead of single,abstract words for better "hypothesis"
sentiment_labels = ['positive sentiment', 'negative sentiment', 'mixed or no sentiment']

pain_point_labels = [
    'high price',
    'unreliable service',
    'poor tech support',
    'billing dispute',
    'complex tech',
    'lack of value',
    'no specific issue'
]

satisfaction_labels = [
    'very satisfied',
    'satisfied',
    'neither satisfied nor dissatisfied',
    'dissatisfied',
    'very dissatisfied'
]

# Define the mapping based on the specialized model's output
sentiment_map = {
    'negative': 'negative sentiment',
    'neutral': 'mixed or no sentiment',
    'positive': 'positive sentiment'
}

# --- 5.iii. Define the Analysis Function ---
def analyze_narrative(narrative):
    if not isinstance(narrative, str):
        return {'Sentiment': 'N/A', 'Pain_Point': 'N/A', 'Satisfaction': 'N/A'}
    try:
        # Use the specialized model for sentiment
        sent_result = sentiment_classifier(narrative)[0]
        # This model's labels might be 'LABEL_0', 'LABEL_1', 'LABEL_2' or
        model_sentiment = sent_result['label']
        # Apply the manual mapping
        sentiment = sentiment_map.get(model_sentiment, model_sentiment)

        # Use the zero-shot model for your custom labels
        pain_result = zero_shot_classifier(narrative, pain_point_labels, multi_label=False)
        sat_result = zero_shot_classifier(narrative, satisfaction_labels, multi_label=False)

        return {
            'Sentiment': sentiment,
            'Pain_Point': pain_result['labels'][0],
            'Satisfaction': sat_result['labels'][0]
        }
    except Exception as e:
        print(f"Error processing narrative: {e}")
        return {'Sentiment': 'Error', 'Pain_Point': 'Error', 'Satisfaction': 'Error'}

# --- 6. Apply the Function to Your DataFrame ---
print("\nAnalyzing narratives... This may take a moment.")
all_results= []

# Loop through each row to analyze its 'Narrative'
for index, row in df.iterrows():
    result = analyze_narrative(row['Narrative'])
    all_results.append(result)

# Convert the list of dictionaries into a new DataFrame
analysis_df = pd.DataFrame(all_results)

# Join the new analysis columns back to your original DataFrame
df_final = pd.concat([df, analysis_df], axis=1)

# --- 7. Save and Display Results ---
output_filename = "narratives_with_sentiment_hyprid.csv"
df_final.to_csv(output_filename, index=False, encoding='utf-8')

print(f"\nAnalysis complete. Results saved to {output_filename}")
print("\n--- Final Data Head with Analysis ---")
display(df_final.head())


## For visualization purpose only, it should run individually  for each approach

In [None]:
import altair as alt

# ---8. Analyze the narratives with sentiment, satisfaction and pain points

# Create a copy for plotting
df_plot = df_final.copy()

# define y-axis
y_padding = 70

# Map OriginalChurnStatus to readable labels for plotting
df_plot['ChurnStatus_Label'] = df_plot['OriginalChurnStatus'].map({
    0: 'Not Churned',
    1: 'Churned'
})

# --- 8.1. Distribution Charts ---
print("Generating distribution charts...")

# Chart 1: Distribution of Customer Sentiment
chart_sentiment = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Sentiment', sort='-y'),
    y=alt.Y('count()', title='Total Number of Sentiment Levels',
            scale=alt.Scale(domain=(0, y_padding))),
    tooltip=['Sentiment', 'count()']
).properties(
    title='Distribution of Customer Sentiment',
    width=600,
    height=400
).interactive()
chart_sentiment.display()

# Chart 2: Distribution of Customer Pain Points
chart_pain_point = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Pain_Point', sort='-y'),
    y=alt.Y('count()', title='Total Number of Pain Points'),
    tooltip=['Pain_Point', 'count()']
).properties(
    title='Distribution of Customer Pain Points',
    width=600,
    height=400
).interactive()
chart_pain_point.display()

# Chart 3: Distribution of Customer Satisfaction
chart_satisfaction = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Satisfaction', sort='-y'),
    y=alt.Y('count()', title='Total Number of Satisfaction Level'),
    tooltip=['Satisfaction', 'count()']
   ).properties(
    title='Distribution of Customer Satisfaction',
    width=600,
    height=400
).interactive()
chart_satisfaction.display()

# --- 8.2. Relationship Charts ---
print("Generating relationship charts...")

# Chart 4: Customer Satisfaction by Pain Point
chart_pain_satisfaction = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Pain_Point', sort='-y'),
    y=alt.Y('count()', title='Total Number of Pain Points'),
    color=alt.Color('Satisfaction', sort=['very satisfied', 'satisfied', 'neutral', 'dissatisfied', 'very dissatisfied']), # Define explicit sort order
    tooltip=['Pain_Point', 'Satisfaction', 'count()']
).properties(
    title='Customer Satisfaction by Pain Point',
    width=600,
    height=400
).interactive()
chart_pain_satisfaction.display()

# Chart 5: Customer Churn by Pain Point
chart_pain_churn = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Pain_Point', sort='-y'),
    y=alt.Y('count()', title='Total Number of Pain Points'),
    color='ChurnStatus_Label',
    tooltip=['Pain_Point', 'ChurnStatus_Label', 'count()']
).properties(
    title='Customer Churn by Pain Point',
    width=600,
    height=400
).interactive()
chart_pain_churn.display()

# Chart 6: Customer Satisfaction by Sentiment
chart_sentiment_satisfaction = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Sentiment', sort='-y'),  #
    y=alt.Y('count()', title='The total number of Sentiment Levels',
            scale=alt.Scale(domain=(0, y_padding))),
    color=alt.Color('Satisfaction', sort=['very satisfied', 'satisfied', 'neutral', 'dissatisfied', 'very dissatisfied']), # Define explicit sort order
    tooltip=['Sentiment', 'Satisfaction', 'count()']
).properties(
    title='Customer Satisfaction by Sentiment',
    width=600,
    height=400
).interactive()
chart_sentiment_satisfaction.display()

# Chart 7: Customer Churn By Sentiment
chart_sentiment_churn = alt.Chart(df_plot).mark_bar().encode(
    x=alt.X('Sentiment', sort='-y'),
    y=alt.Y('count()', title='The total number of Sentiment Levels',
            scale=alt.Scale(domain=(0, y_padding))),
    color='ChurnStatus_Label',  # Use the mapped label
    tooltip=['Sentiment', 'ChurnStatus_Label', 'count()']
).properties(
    title='Customer Churn By Sentiment',
    width=600,
    height=400
).interactive()
chart_sentiment_churn.display()

# Chart 8: A heatmap of churn rate by pain points and satisfaction
print("\n Generating heatmap of churn rate by pain points and satisfaction...")

# Group by Pain_Point and Satisfaction, get total size of each group, get total churned
df_agg = df_final.groupby(['Pain_Point', 'Satisfaction']).agg(
    Total_Customers=('CustomerID', 'size'),
    Churned_Customers=('OriginalChurnStatus', 'sum')
).reset_index()

# Calculate Churn Rate percentage
df_agg['Churn_Rate_Percent'] = (df_agg['Churned_Customers'] / df_agg['Total_Customers']) * 100

# Format a text label for the chart and use'.0f' to show whole numbers
df_agg['Churn_Rate_Label'] = df_agg['Churn_Rate_Percent'].apply(lambda x: f'{x:.0f}%')

print("Aggregation complete. Data to plot:")
print(df_agg.to_markdown(index=False, numalign="left", stralign="left"))

# The base chart, using our aggregated data
base = alt.Chart(df_agg).encode(
    # X-axis for Satisfaction
    x=alt.X('Satisfaction',
            title='Customer Satisfaction',
            axis=alt.Axis(labelAngle=0)),

    # Y-axis for Pain Point
    y=alt.Y('Pain_Point', title='Customer Pain Point'),

    # Tooltip to show details on hover
    tooltip=[
        alt.Tooltip('Pain_Point', title='Pain Point'),
        'Satisfaction',
        'Total_Customers',
        'Churned_Customers',
        alt.Tooltip('Churn_Rate_Percent', title='Churn Rate', format='.0f')
    ]
).properties(
        title='Heatmap of Churn Rate by Pain Point and Satisfaction',
        width=600,
        height=400
)


# The heatmap layer (the colored rectangles)
heatmap = base.mark_rect().encode(
    # Color is based on the churn rate
    color=alt.Color('Churn_Rate_Percent',
                    title='Churn Rate (%)',
                    # We set a domain of 0-100 so 100% is always the "hottest"
                    # 'heatmap' is a good perceptually uniform color scale
                    scale=alt.Scale(range='heatmap', domain=[0, 100]),
                    legend=alt.Legend(direction='vertical')
                    )
)

# The text layer to show the percentage on each block
text = base.mark_text(baseline='middle').encode(
    text=alt.Text('Churn_Rate_Label'),

    # If the churn rate is over 50 (darker color), use white text
    # Otherwise (lighter color), use black text for readability
    color=alt.condition(
        alt.datum.Churn_Rate_Percent > 50,
        alt.value('white'),
        alt.value('black')
    )
)

# Combine the heatmap and text layers
final_chart = (heatmap + text).properties(
    title='Heatmap of Churn Rate by Pain Point and Satisfaction'
).interactive()
final_chart.display()

Generating distribution charts...


Generating relationship charts...



 Generating heatmap of churn rate by pain points and satisfaction...
Aggregation complete. Data to plot:
| Pain_Point         | Satisfaction                       | Total_Customers   | Churned_Customers   | Churn_Rate_Percent   | Churn_Rate_Label   |
|:-------------------|:-----------------------------------|:------------------|:--------------------|:---------------------|:-------------------|
| billing dispute    | very satisfied                     | 2                 | 0                   | 0                    | 0%                 |
| complex tech       | dissatisfied                       | 2                 | 2                   | 100                  | 100%               |
| complex tech       | satisfied                          | 8                 | 0                   | 0                    | 0%                 |
| complex tech       | very satisfied                     | 1                 | 0                   | 0                    | 0%                 |
| high price      