# Capstone Project: AI Data Quality Checker
### Version: 1.0
### Created by: Omar Chehab
### Date: 29-11-2025

Importing Modules

In [124]:
from typing import Any, Dict
import os
from kaggle_secrets import UserSecretsClient

from google.adk.agents import Agent, LlmAgent, SequentialAgent
from google.adk.models.google_llm import Gemini
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.adk.tools.tool_context import ToolContext
from google.genai import types
from google.adk.tools import AgentTool, FunctionTool, google_search
from google.adk.runners import InMemoryRunner
from google import generativeai as genai


print("‚úÖ ADK components imported successfully.")

‚úÖ ADK components imported successfully.


API Configuration

In [125]:
# Load API Key from Kaggle Secrets
try:
    from kaggle_secrets import UserSecretsClient
    GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
    print("‚úÖ Gemini API key setup complete.")
except ImportError:
    print("‚ö†Ô∏è Kaggle Secrets not available. Ensure you're in a Kaggle Notebook.")
except KeyError:
    print("üîë Authentication Error: Add 'GOOGLE_API_KEY' to Kaggle secrets.")


# Agent Configuration
CONFIG = {
    "project": "",
    "model": "models/gemini-2.5-flash",
    "max_tokens": 2000,
    "temperature": 0.3,
    "version": "1.0"
}

print(f"\n{'='*60}")
print(f"{'AGENT CONFIGURATION':^60}")
print(f"{'='*60}")
for k, v in CONFIG.items():
    print(f"{k:.<25} {v}")
print(f"{'='*60}")

‚úÖ Gemini API key setup complete.

                    AGENT CONFIGURATION                     
project.................. 
model.................... models/gemini-2.5-flash
max_tokens............... 2000
temperature.............. 0.3
version.................. 1.0


In [126]:
retry_config = types.HttpRetryOptions(
    attempts=5, # Maximum retry attempts
    exp_base=2, # Delay multiplier
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504] # HTTP errors retry
)

print("‚úÖ Retry configuration defined.")

‚úÖ Retry configuration defined.


In [127]:
# Lists all LLM models available in your Google Generative AI account
models = genai.list_models()

# Prints each model name
for model in models:
    print(model.name)

models/embedding-gecko-001
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/learnlm-2.0-flash-experimental
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
models/gemma-3n-e2b-it
models/gemini-flash-latest
models/gemini-flash-lite-latest
models/gemini-pro-latest
models/gemini-2.5-flash-lite
models/gemini-2.5-flash-image-preview
mod

Loading Dataset - Churn Prediction

In [128]:
# Loading DataFrame
df = pd.read_csv("/kaggle/input/bank-customer-churn-prediction-dataset/Churn_Modelling.csv")

# Quick check
print(f"‚úÖ Data loaded: {len(df)} rows")
print(f"üìä Columns: {df.columns.tolist()}")

‚úÖ Data loaded: 10000 rows
üìä Columns: ['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']


In [129]:
# Checking sample output
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# Defining Analytics AI Agent

In [130]:
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Any
import google.generativeai as gen
from kaggle_secrets import UserSecretsClient

class AnalyticsAgent:
    """
    Analytics Agent for generating executive insights on Bank Customer Churn data.
    Powered by Google Gemini for intelligent Q&A.
    """
    
    def __init__(self, dataframe: pd.DataFrame):
        """
        Initialize the Analytics Agent with the dataset.
        
        Args:
            dataframe: pandas DataFrame containing bank customer churn data
        """
        self.df = dataframe. copy()
        self.insights = {}
        self.chat_history = []
        
    def generate_executive_summary(self) -> Dict[str, Any]:
        """Generate a comprehensive executive summary with key metrics."""
        summary = {
            'total_customers': int(len(self.df)),
            'churn_rate': float(self._calculate_churn_rate()),
            'avg_customer_age': float(self. df['Age'].mean()),
            'avg_account_balance': float(self.df['Balance'].mean()),
            'avg_estimated_salary': float(self.df['EstimatedSalary'].mean()),
            'active_member_rate': float((self.df['IsActiveMember']. sum() / len(self.df)) * 100),
            'credit_card_holder_rate': float((self.df['HasCrCard'].sum() / len(self.df)) * 100),
        }
        self.insights['executive_summary'] = summary
        return summary
    
    def _calculate_churn_rate(self) -> float:
        """Calculate overall churn rate as percentage."""
        return (self.df['Exited'].sum() / len(self.df)) * 100
    
    def analyze_churn_by_geography(self) -> pd.DataFrame:
        """Analyze churn rates across different geographies."""
        geo_analysis = self.df.groupby('Geography', observed=True).agg({
            'Exited': ['sum', 'count', 'mean'],
            'Balance': 'mean',
            'EstimatedSalary': 'mean'
        }).round(2)
        geo_analysis.columns = ['Churned_Customers', 'Total_Customers', 'Churn_Rate', 
                                'Avg_Balance', 'Avg_Salary']
        geo_analysis['Churn_Rate'] = geo_analysis['Churn_Rate'] * 100
        self.insights['geography_analysis'] = geo_analysis
        return geo_analysis
    
    def analyze_churn_by_demographics(self) -> Dict[str, pd.DataFrame]:
        """Analyze churn patterns by demographic factors (age, gender)."""
        self.df['AgeGroup'] = pd.cut(self.df['Age'], 
                                      bins=[0, 30, 40, 50, 60, 100],
                                      labels=['<30', '30-40', '40-50', '50-60', '60+'])
        
        age_analysis = self.df.groupby('AgeGroup', observed=True).agg({
            'Exited': ['sum', 'count', 'mean'],
            'Balance': 'mean'
        }).round(2)
        age_analysis.columns = ['Churned', 'Total', 'Churn_Rate', 'Avg_Balance']
        age_analysis['Churn_Rate'] = age_analysis['Churn_Rate'] * 100
        
        gender_analysis = self.df.groupby('Gender', observed=True). agg({
            'Exited': ['sum', 'count', 'mean'],
            'Balance': 'mean',
            'CreditScore': 'mean'
        }).round(2)
        gender_analysis.columns = ['Churned', 'Total', 'Churn_Rate', 'Avg_Balance', 'Avg_CreditScore']
        gender_analysis['Churn_Rate'] = gender_analysis['Churn_Rate'] * 100
        
        demographics = {
            'age_analysis': age_analysis,
            'gender_analysis': gender_analysis
        }
        self.insights['demographics'] = demographics
        return demographics
    
    def analyze_product_engagement(self) -> pd. DataFrame:
        """Analyze churn based on number of products and engagement metrics."""
        product_analysis = self.df.groupby('NumOfProducts', observed=True).agg({
            'Exited': ['sum', 'count', 'mean'],
            'Balance': 'mean',
            'Tenure': 'mean',
            'IsActiveMember': 'mean'
        }).round(2)
        product_analysis.columns = ['Churned', 'Total', 'Churn_Rate', 
                                    'Avg_Balance', 'Avg_Tenure', 'Active_Rate']
        product_analysis['Churn_Rate'] = product_analysis['Churn_Rate'] * 100
        product_analysis['Active_Rate'] = product_analysis['Active_Rate'] * 100
        self.insights['product_engagement'] = product_analysis
        return product_analysis
    
    def identify_high_risk_segments(self) -> pd.DataFrame:
        """Identify customer segments with highest churn risk."""
        segments = self.df.groupby(['Geography', 'Gender', 'IsActiveMember'], observed=True).agg({
            'Exited': ['sum', 'count', 'mean'],
            'Balance': 'mean',
            'Age': 'mean'
        }).round(2)
        segments.columns = ['Churned', 'Total', 'Churn_Rate', 'Avg_Balance', 'Avg_Age']
        segments['Churn_Rate'] = segments['Churn_Rate'] * 100
        high_risk = segments[segments['Total'] >= 50]. sort_values('Churn_Rate', ascending=False)
        self.insights['high_risk_segments'] = high_risk. head(10)
        return high_risk. head(10)
    
    def analyze_financial_profile(self) -> Dict[str, Any]:
        """Analyze financial characteristics of churned vs retained customers."""
        churned = self.df[self.df['Exited'] == 1]
        retained = self.df[self.df['Exited'] == 0]
        
        financial_profile = {
            'churned_customers': {
                'avg_balance': float(churned['Balance'].mean()),
                'median_balance': float(churned['Balance'].median()),
                'avg_credit_score': float(churned['CreditScore'].mean()),
                'avg_salary': float(churned['EstimatedSalary'].mean()),
                'zero_balance_pct': float((churned['Balance'] == 0).sum() / len(churned) * 100)
            },
            'retained_customers': {
                'avg_balance': float(retained['Balance']. mean()),
                'median_balance': float(retained['Balance'].median()),
                'avg_credit_score': float(retained['CreditScore'].mean()),
                'avg_salary': float(retained['EstimatedSalary'].mean()),
                'zero_balance_pct': float((retained['Balance'] == 0).sum() / len(retained) * 100)
            }
        }
        self.insights['financial_profile'] = financial_profile
        return financial_profile
    
    def calculate_customer_lifetime_value_impact(self) -> Dict[str, float]:
        """Calculate the financial impact of customer churn."""
        churned = self.df[self.df['Exited'] == 1]
        estimated_revenue_per_customer = 0.01
        
        impact = {
            'total_churned_customers': int(len(churned)),
            'total_balance_lost': float(churned['Balance'].sum()),
            'avg_balance_per_churned_customer': float(churned['Balance'].mean()),
            'estimated_annual_revenue_loss': float(churned['Balance'].sum() * estimated_revenue_per_customer),
            'avg_tenure_of_churned': float(churned['Tenure'].mean()),
        }
        self.insights['clv_impact'] = impact
        return impact
    
    def get_all_insights(self) -> Dict[str, Any]:
        """Run all analyses and return comprehensive insights dictionary."""
        self.generate_executive_summary()
        self.analyze_churn_by_geography()
        self.analyze_churn_by_demographics()
        self.analyze_product_engagement()
        self.identify_high_risk_segments()
        self.analyze_financial_profile()
        self.calculate_customer_lifetime_value_impact()
        return self.insights
    
    # ============================================================================
    # GEMINI-POWERED Q&A FUNCTIONALITY
    # ============================================================================
    
    def _prepare_context(self) -> str:
        """
        Prepare a comprehensive context string with all insights for Gemini.
        """
        if not self.insights:
            self.get_all_insights()
        
        context = "=== BANK CUSTOMER CHURN ANALYSIS DATA ===\n\n"
        
        # Executive Summary
        context += "EXECUTIVE SUMMARY:\n"
        for key, value in self.insights['executive_summary'].items():
            context += f"- {key. replace('_', ' ').title()}: {value:,.2f}\n"
        
        # Geography Analysis
        context += "\nCHURN BY GEOGRAPHY:\n"
        geo_df = self.insights['geography_analysis']
        for geo, row in geo_df.iterrows():
            context += f"- {geo}: {row['Churn_Rate']:.2f}% churn rate, "
            context += f"{int(row['Churned_Customers'])} of {int(row['Total_Customers'])} customers, "
            context += f"Avg Balance: ${row['Avg_Balance']:,.2f}\n"
        
        # Demographics - Age
        context += "\nCHURN BY AGE GROUP:\n"
        age_df = self.insights['demographics']['age_analysis']
        for age_group, row in age_df.iterrows():
            context += f"- {age_group}: {row['Churn_Rate']:.2f}% churn rate, "
            context += f"{int(row['Churned'])} of {int(row['Total'])} customers\n"
        
        # Demographics - Gender
        context += "\nCHURN BY GENDER:\n"
        gender_df = self. insights['demographics']['gender_analysis']
        for gender, row in gender_df.iterrows():
            context += f"- {gender}: {row['Churn_Rate']:.2f}% churn rate, "
            context += f"{int(row['Churned'])} of {int(row['Total'])} customers\n"
        
        # Product Engagement
        context += "\nCHURN BY NUMBER OF PRODUCTS:\n"
        product_df = self.insights['product_engagement']
        for num_products, row in product_df.iterrows():
            context += f"- {int(num_products)} products: {row['Churn_Rate']:.2f}% churn rate, "
            context += f"{int(row['Churned'])} of {int(row['Total'])} customers, "
            context += f"Avg Tenure: {row['Avg_Tenure']:.1f} years, "
            context += f"Active Rate: {row['Active_Rate']:.1f}%\n"
        
        # High Risk Segments
        context += "\nTOP 5 HIGH-RISK SEGMENTS:\n"
        high_risk = self.insights['high_risk_segments']. head(5)
        for idx, (segment, row) in enumerate(high_risk.iterrows(), 1):
            geo, gender, is_active = segment
            active_status = "Active" if is_active == 1 else "Inactive"
            context += f"{idx}. {geo} - {gender} - {active_status}: "
            context += f"{row['Churn_Rate']:.2f}% churn rate, {int(row['Total'])} customers\n"
        
        # Financial Profile
        context += "\nFINANCIAL PROFILE COMPARISON:\n"
        fp = self.insights['financial_profile']
        context += "Churned Customers:\n"
        context += f"  - Avg Balance: ${fp['churned_customers']['avg_balance']:,.2f}\n"
        context += f"  - Avg Credit Score: {fp['churned_customers']['avg_credit_score']:.0f}\n"
        context += f"  - Avg Salary: ${fp['churned_customers']['avg_salary']:,.2f}\n"
        context += "Retained Customers:\n"
        context += f"  - Avg Balance: ${fp['retained_customers']['avg_balance']:,.2f}\n"
        context += f"  - Avg Credit Score: {fp['retained_customers']['avg_credit_score']:.0f}\n"
        context += f"  - Avg Salary: ${fp['retained_customers']['avg_salary']:,.2f}\n"
        
        # Financial Impact
        context += "\nFINANCIAL IMPACT:\n"
        impact = self.insights['clv_impact']
        context += f"- Total Churned Customers: {impact['total_churned_customers']:,}\n"
        context += f"- Total Balance Lost: ${impact['total_balance_lost']:,.2f}\n"
        context += f"- Estimated Annual Revenue Loss: ${impact['estimated_annual_revenue_loss']:,.2f}\n"
        context += f"- Avg Tenure of Churned: {impact['avg_tenure_of_churned']:.1f} years\n"
        
        return context
    
    def _create_system_prompt(self) -> str:
        """
        Create the system prompt that defines the agent's role and behavior.
        """
        system_prompt = """You are an expert Analytics Agent specializing in customer churn analysis for a bank. 

                        Your role is to answer executive questions about customer churn data with:
                        - Clear, concise, and actionable insights
                        - Data-driven responses based on the provided analysis
                        - Executive-friendly language (avoid jargon)
                        - Specific numbers and percentages from the data
                        - Strategic recommendations when appropriate
                        - Professional formatting with emojis for visual clarity (üìä üåç üë• üí∞ ‚ö†Ô∏è üéØ)
                        
                        When answering:
                        1. Always reference specific data points from the analysis
                        2. Highlight key insights and patterns
                        3. Provide context and comparisons
                        4. End with actionable recommendations when relevant
                        5. Be direct and avoid unnecessary preamble
                        6. Use bullet points and clear structure
                        
                        The data context below contains all the churn analysis results you should reference. 
                        """
        return system_prompt
    
    def model_config(self, system_prompt: str, user_prompt: str) -> str:
        """
        Configure and call Gemini model with the given prompts.
        
        Args:
            system_prompt: System instructions for the model
            user_prompt: User's question
            
        Returns:
            Model's response text
        """
        try:
            api_key = UserSecretsClient().get_secret("GOOGLE_API_KEY")
        except Exception:
            return "[Simulated LLM: GOOGLE_API_KEY secret not accessible in this environment.]"
        
        try:    
            gen. configure(api_key=api_key)
            model = gen.GenerativeModel("gemini-2.0-flash-exp")
            prompt = system_prompt + "\n\n" + user_prompt
            response = model.generate_content(prompt)
            return response.text
                
        except Exception as e:
            return f"[Simulated LLM: Gemini unreachable ‚Üí {e}]"
    
    def ask(self, question: str) -> str:
        """
        Main Q&A interface.  Ask the agent any question about the churn data.
        Powered by Google Gemini LLM.
        
        Args:
            question: Natural language question from the executive
            
        Returns:
            String answer to the question
        """
        # Ensure insights are generated
        if not self.insights:
            print("üìä Analyzing data... Please wait...")
            self.get_all_insights()
            print("‚úÖ Analysis complete!\n")
        
        # Store question in chat history
        self.chat_history.append({'role': 'user', 'content': question})
        
        # Prepare context and prompts
        context = self._prepare_context()
        system_prompt = self._create_system_prompt()
        user_prompt = f"DATA CONTEXT:\n{context}\n\nEXECUTIVE QUESTION:\n{question}\n\nProvide a clear, data-driven answer:"
        
        # Get response from Gemini
        answer = self.model_config(system_prompt, user_prompt)
        
        # Store answer in chat history
        self.chat_history.append({'role': 'agent', 'content': answer})
        
        return answer
    
    def start_chat(self):
        """
        Start an interactive chat session (for Jupyter notebooks or console).
        Type 'quit', 'exit', or 'bye' to end the session.
        """
        print("=" * 80)
        print("ü§ñ ANALYTICS AGENT - EXECUTIVE Q&A SESSION (Powered by Gemini)")
        print("=" * 80)
        print("\nHello! I'm your AI-powered Analytics Agent. I can answer questions about")
        print("customer churn using advanced language understanding.")
        print("\nType 'quit', 'exit', or 'bye' to end the session.\n")
        print("-" * 80)
        
        # Ensure insights are loaded
        if not self.insights:
            print("üìä Loading and analyzing data... Please wait...")
            self.get_all_insights()
            print("‚úÖ Ready to answer your questions!\n")
        
        while True:
            try:
                question = input("\nüíº Executive: ").strip()
                
                if question.lower() in ['quit', 'exit', 'bye', 'q']:
                    print("\nüëã Thank you for using Analytics Agent.  Goodbye!")
                    break
                
                if not question:
                    continue
                
                print("\nü§ñ Agent: [Thinking... ]\n")
                answer = self. ask(question)
                print(answer)
                print("\n" + "-" * 80)
                
            except KeyboardInterrupt:
                print("\n\nüëã Session ended.  Goodbye!")
                break
            except Exception as e:
                print(f"\n‚ùå Error: {str(e)}")
                print("Please try rephrasing your question.\n")
    
    def get_chat_history(self) -> List[Dict[str, str]]:
        """Return the chat history."""
        return self.chat_history
    
    def clear_chat_history(self):
        """Clear the chat history."""
        self.chat_history = []
        print("‚úÖ Chat history cleared.")
    
    def print_executive_report(self):
        """Print a formatted executive report to console."""
        insights = self.get_all_insights()
        
        print("=" * 80)
        print("EXECUTIVE INSIGHTS REPORT - BANK CUSTOMER CHURN ANALYSIS")
        print("=" * 80)
        
        print("\nüìà EXECUTIVE SUMMARY:")
        print("-" * 80)
        for key, value in insights['executive_summary'].items():
            key_formatted = key.replace('_', ' ').title()
            if isinstance(value, int):
                print(f"  {key_formatted}: {value:,}")
            else:
                print(f"  {key_formatted}: {value:,.2f}")
        
        print("\n\nüåç CHURN BY GEOGRAPHY:")
        print("-" * 80)
        print(insights['geography_analysis'])
        
        print("\n\nüì¶ PRODUCT ENGAGEMENT:")
        print("-" * 80)
        print(insights['product_engagement'])
        
        print("\n\n‚ö†Ô∏è  TOP 5 HIGH-RISK SEGMENTS:")
        print("-" * 80)
        print(insights['high_risk_segments'].head())
        
        print("\n" + "=" * 80)

print("‚úÖ AnalyticsAgent class loaded successfully!")

‚úÖ AnalyticsAgent class loaded successfully!


In [131]:
# Create the Analytics Agent
agent = AnalyticsAgent(df)

print("‚úÖ Agent initialized and ready!")

‚úÖ Agent initialized and ready!


# General Question

In [132]:
agent.ask("What can you help with? What type of dataset can you support me with?")

üìä Analyzing data... Please wait...
‚úÖ Analysis complete!



'I can assist you with in-depth customer churn analysis to identify key drivers and high-risk segments. üìä I can provide actionable insights based on the provided dataset.\n\nHere\'s how I can help:\n\n*   **Comprehensive Churn Overview**:\n    *   Provide a summary of overall churn rate (currently 20.37%).\n    *   Breakdown the total number of churned customers (2,037) and associated financial impact. üí∞\n\n*   **Geographic Analysis**:\n    *   Identify regions with high churn, such as Germany (32.00%) compared to France (16.00%) and Spain (17.00%). üåç\n    *   Highlight average balances in each region to understand potential revenue impact.\n\n*   **Demographic Segmentation**:\n    *   Analyze churn by age group, pinpointing high churn rates in the 40-50 (34.00%) and 50-60 (56.00%) age brackets. üë•\n    *   Compare churn rates between genders, noting the higher rate among females (25.00%) compared to males (16.00%).\n\n*   **Product Usage Analysis**:\n    *   Assess churn ba

# Churn Analysis

In [133]:
agent.ask("What's our overall churn rate?")

"üìä Our overall churn rate is **20.37%**. This means that out of our total customer base of 10,000, we lost 2,037 customers. ‚ö†Ô∏è It's crucial we understand the drivers behind this to reduce losses.\n"

In [134]:
agent.ask("What's our biggest churn problem?")

"Here's a breakdown of our biggest churn challenges:\n\n*   **Overall Churn Rate:** Our bank is experiencing a 20.37% churn rate across our 10,000 customers. ‚ö†Ô∏è\n\n*   **Geographic Hotspot:** Germany has a significantly high churn rate of 32.00% (814 of 2509 customers), compared to France (16.00%) and Spain (17.00%).üåç\n\n*   **Age Group at Risk:** The 50-60 age group has the highest churn rate at 56.00% (448 of 797 customers), followed closely by the 40-50 age group at 34.00%. üë•\n\n*   **Gender Disparity:** Female customers churn at a higher rate of 25.00% (1139 of 4543 customers) compared to male customers at 16.00%. üìä\n\n*   **Product Ownership:** Customers with only 1 product have a churn rate of 28.00% (1409 of 5084 customers), while those with 3 or 4 products experience very high churn rates of 83.00% and 100.00% respectively. This could be a red flag due to issues in implementation or expectations of the products. üí∞\n\n*   **High-Risk Segments:** Our top 5 high-ri

In [135]:
agent.ask("Which factors contribute most to churn?")

"Here's a breakdown of the key churn drivers, based on the data:\n\n*   **Geography:** üåç Germany stands out with a significantly higher churn rate of 32.00%, compared to France (16.00%) and Spain (17.00%).\n\n*   **Age Group:** üë• Customers aged 40-60 exhibit the highest churn rates: 40-50 (34.00%) and 50-60 (56.00%). Churn is significantly lower for younger customers, with only 8.00% for those under 30.\n\n*   **Gender:** üë©\u200düíº Females churn at a higher rate (25.00%) compared to males (16.00%).\n\n*   **Number of Products:** üõí Customers with only 1 product have a high churn rate (28.00%). Conversely, those with 2 products have a much lower churn rate (8.00%). Customers using 3 or 4 products have extremely high churn rates of 83% and 100% respectively, but this affects a much smaller number of customers.\n\n*   **High-Risk Segments:** ‚ö†Ô∏è The top 5 high-risk segments are dominated by inactive customers in Germany, Spain and France, with churn rates ranging from 26% 

# Demographics

In [136]:
# # Demographics
agent.ask("How does age affect churn?")

"Here's how age affects churn at our bank:\n\n*   **Overall Trend:** Churn rate increases significantly with age, peaking in the 50-60 age group. üìà\n*   **Specific Numbers:**\n    *   Under 30: 8% churn rate (148 of 1968 customers).\n    *   30-40: 12% churn rate (538 of 4451 customers).\n    *   40-50: 34% churn rate (788 of 2320 customers).\n    *   50-60: A concerning 56% churn rate (448 of 797 customers).\n    *   60+: 25% churn rate (115 of 464 customers).\n*   **Key Insight:** Customers in the 40-60 age range are significantly more likely to churn. ‚ö†Ô∏è The churn rate more than quadruples between the 30-40 and 50-60 age groups.\n\n**Recommendation:** üéØ\n\n*   Develop targeted retention strategies for customers aged 40-60. This might include:\n    *   Personalized financial planning services. üßë\u200düíº\n    *   Special product offerings tailored to their life stage. üí∞\n    *   Increased engagement and communication to address their specific needs. üë•\n"

In [137]:
agent. ask("Do men or women churn more?")

'üìä Churn by Gender Analysis\n\n*   **Finding:** Women churn at a higher rate than men.\n    *   Female churn rate: 25.00% (1,139 of 4,543 customers)\n    *   Male churn rate: 16.00% (898 of 5,457 customers)\n*   **Impact:** The bank is losing a proportionally higher number of female customers compared to male customers.\n*   **Recommendation:** Investigate the reasons behind the higher female churn rate. This could involve surveys or targeted feedback sessions to understand their specific needs and pain points. Tailor retention strategies to address these concerns. üéØ\n'

In [138]:
agent.ask("What age group should we focus on?")

'üéØ Focus on customers in the 40-60 age range, as they exhibit the highest churn rates.\n\n*   **High Churn Rates**:\n    *   üìä Customers aged 40-50 have a churn rate of 34.00% (788 out of 2320 customers).\n    *   üìä Customers aged 50-60 have a significantly higher churn rate of 56.00% (448 out of 797 customers).\n*   **Comparison**: These churn rates are substantially higher than those of younger age groups:\n    *   üìä <30: 8.00% churn rate\n    *   üìä 30-40: 12.00% churn rate\n\n‚ö†Ô∏è **Recommendation**: Implement targeted retention strategies for customers aged 40-60 to mitigate churn. This could include personalized financial advice, tailored product offerings, and proactive engagement to address their specific needs and concerns.\n'

# Testing Questions

In [139]:
# # Geographic Analysis
# agent.ask("Which countries have the highest churn? ")
# agent.ask("Why is Germany churning more than other countries?")

# # Product & Engagement
# agent.ask("How do product holdings affect churn?")
# agent. ask("What about active vs inactive members?")

# # Risk & Segments
# agent.ask("Who are our highest-risk customers?")
# agent.ask("What customer segments should we prioritize?")

# # Financial Impact
# agent.ask("What's the financial impact of churn?")
# agent.ask("How much revenue are we losing? ")
# agent.ask("If we reduce churn by 15%, what's the savings?")

# # Recommendations
# agent.ask("What should we do to reduce churn?")
# agent.ask("Give me your top 5 action items")
# agent.ask("What's our best retention strategy?")

# # Complex Questions
# agent.ask("Compare Germany vs France churn patterns")
# agent.ask("Why do customers with more products churn more?")
# agent.ask("What's the profile of a typical churned customer?")

# Creating User Interface for Chatbot

In [140]:
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import google.generativeai as genai
from kaggle_secrets import UserSecretsClient

class VisualChatbot:
    """Visual chatbot using Jupyter widgets with better contrast and visibility"""
    
    def __init__(self, agent):
        self.agent = agent
        self. chat_history = []
        self.setup_gemini()
        self.create_ui()
    
    def setup_gemini(self):
        """Setup Gemini API"""
        try:
            api_key = UserSecretsClient(). get_secret("GOOGLE_API_KEY")
            genai. configure(api_key=api_key)
            
            model = genai.GenerativeModel(
                model_name="gemini-2.0-flash-exp",
                generation_config={
                    "temperature": 0.7,
                    "top_p": 0.95,
                    "max_output_tokens": 8192,
                }
            )
            
            context = self.agent._prepare_context()
            system_instruction = f"""You are an expert Analytics Agent specializing in bank customer churn analysis. 

{context}

Provide clear, data-driven insights with specific numbers and actionable recommendations."""
            
            self.chat_session = model.start_chat(history=[])
            self.chat_session. send_message(system_instruction)
            
        except Exception as e:
            print(f"‚ùå Error: {e}")
    
    def create_ui(self):
        """Create widget-based UI with better contrast"""
        # Chat output area - uses flex to fill available space
        self.chat_output = widgets.Output(
            layout=widgets.Layout(
                min_height='300px',
                max_height='500px',
                border='2px solid #667eea',
                padding='15px',
                overflow_y='auto',
                background_color='#ffffff',
                flex='1 1 auto'
            )
        )
        
        # Input box
        self.input_box = widgets.Text(
            placeholder='Type your question here...',
            layout=widgets.Layout(width='80%'),
            style={'description_width': 'initial'}
        )
        
        # Send button
        self.send_button = widgets.Button(
            description='Send üì§',
            button_style='primary',
            layout=widgets.Layout(width='18%')
        )
        
        # Clear button
        self.clear_button = widgets.Button(
            description='Clear Chat üóëÔ∏è',
            button_style='warning',
            layout=widgets.Layout(width='100%', margin='10px 0')
        )
        
        # Quick question buttons with full text visible
        button_style = widgets.Layout(width='auto', min_width='200px', margin='5px')
        self.quick_buttons = [
            widgets.Button(description="üìä What's our churn rate?", button_style='info', layout=button_style),
            widgets.Button(description="‚ö†Ô∏è High-risk segments?", button_style='info', layout=button_style),
            widgets.Button(description="üí° Recommendations?", button_style='info', layout=button_style),
            widgets.Button(description="üí∞ Financial impact?", button_style='info', layout=button_style),
            widgets.Button(description="üåç Geography analysis?", button_style='info', layout=button_style),
            widgets.Button(description="üìã Executive summary?", button_style='info', layout=button_style),
        ]
        
        # Event handlers
        self.send_button.on_click(self.on_send)
        self.input_box.on_submit(self.on_send)
        self.clear_button.on_click(self.on_clear)
        
        # Quick button handlers
        self.quick_buttons[0].on_click(lambda b: self.send_quick_question("What's our overall churn rate?"))
        self.quick_buttons[1].on_click(lambda b: self.send_quick_question("Who are our high-risk customer segments?"))
        self.quick_buttons[2]. on_click(lambda b: self.send_quick_question("What are your top recommendations to reduce churn?"))
        self.quick_buttons[3].on_click(lambda b: self.send_quick_question("What's the financial impact of churn?"))
        self.quick_buttons[4].on_click(lambda b: self. send_quick_question("Which geography has the highest churn?"))
        self.quick_buttons[5].on_click(lambda b: self.send_quick_question("Give me an executive summary"))
        
        # Display welcome message
        with self.chat_output:
            display(HTML("""
            <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 25px; border-radius: 10px; text-align: center; margin-bottom: 20px;">
                <h2 style="margin: 0; color: white;">ü§ñ Bank Customer Churn Analytics Agent</h2>
                <p style="margin: 10px 0; color: white; font-size: 16px;">Powered by Google Gemini AI</p>
                <p style="font-size: 14px; opacity: 0.95; color: white;">Ask me anything about customer churn patterns and recommendations! </p>
            </div>
            """))
    
    def display(self):
        """Display the chat interface"""
        # Header with better styling
        header = widgets.HTML("""
        <div style="margin-bottom: 15px; padding: 10px; background: #f8f9fa; border-radius: 8px; border-left: 4px solid #667eea;">
            <h3 style="margin: 0; color: #333;">üí° Quick Questions (Click to Ask):</h3>
        </div>
        """)
        
        # Layout quick buttons in rows with better spacing
        quick_buttons_row1 = widgets.HBox([self.quick_buttons[0], self.quick_buttons[1], self.quick_buttons[2]], 
                                          layout=widgets.Layout(justify_content='flex-start', margin='5px 0'))
        quick_buttons_row2 = widgets.HBox([self.quick_buttons[3], self.quick_buttons[4], self.quick_buttons[5]], 
                                          layout=widgets.Layout(justify_content='flex-start', margin='5px 0'))
        
        quick_buttons_box = widgets.VBox([quick_buttons_row1, quick_buttons_row2])
        
        # Chat label
        chat_label = widgets.HTML("""
        <div style='margin: 20px 0 10px 0; padding: 10px; background: #f8f9fa; border-radius: 8px; border-left: 4px solid #667eea;'>
            <strong style="color: #333;">üí¨ Chat:</strong>
        </div>
        """)
        
        # Chat area wrapper - allows chat to expand and scroll
        chat_area = widgets.VBox(
            [chat_label, self.chat_output],
            layout=widgets.Layout(flex='1 1 auto', min_height='350px')
        )
        
        # Input section - fixed at bottom
        input_section = widgets.VBox(
            [
                widgets.HBox(
                    [self.input_box, self.send_button],
                    layout=widgets.Layout(width='100%', margin='10px 0 0 0')
                ),
                self.clear_button
            ],
            layout=widgets.Layout(flex='0 0 auto')
        )
        
        # Complete UI with flex layout
        ui = widgets.VBox(
            [
                header,
                quick_buttons_box,
                chat_area,
                input_section
            ],
            layout=widgets.Layout(
                display='flex',
                flex_flow='column',
                height='700px'
            )
        )
        
        display(ui)
    
    def on_send(self, b):
        """Handle send button click"""
        message = self. input_box.value. strip()
        if message:
            self.send_message(message)
            self.input_box.value = ''
    
    def send_quick_question(self, question):
        """Send a quick question"""
        self.send_message(question)
    
    def send_message(self, message):
        """Send message and display response with better contrast"""
        # Display user message with dark text
        with self.chat_output:
            display(HTML(f"""
            <div style="background: #e3f2fd; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #2196f3; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                <strong style="color: #1565c0; font-size: 14px;">üíº You:</strong>
                <p style="margin: 8px 0 0 0; color: #212121; font-size: 14px; line-height: 1.6;">{message}</p>
            </div>
            """))
        
        # Show thinking indicator
        with self.chat_output:
            display(HTML("""
            <div style="background: #fff9c4; padding: 10px; border-radius: 8px; margin: 10px 0; text-align: center;">
                <span style="color: #f57c00;">ü§ñ Agent is thinking...</span>
            </div>
            """))
        
        # Get response
        try:
            response = self.chat_session.send_message(message)
            response_text = response. text
            
            # Clear thinking indicator and display agent response with dark text
            self.chat_output.clear_output(wait=True)
            
            # Re-display all chat history
            for item in self.chat_history:
                with self.chat_output:
                    display(HTML(f"""
                    <div style="background: #e3f2fd; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #2196f3; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                        <strong style="color: #1565c0; font-size: 14px;">üíº You:</strong>
                        <p style="margin: 8px 0 0 0; color: #212121; font-size: 14px; line-height: 1.6;">{item['user']}</p>
                    </div>
                    """))
                    
                    # Convert markdown-style formatting to HTML
                    formatted_response = item['agent']. replace('\n', '<br>')
                    formatted_response = formatted_response.replace('**', '<strong>').replace('**', '</strong>')
                    
                    display(HTML(f"""
                    <div style="background: #f5f5f5; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #667eea; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                        <strong style="color: #5e35b1; font-size: 14px;">ü§ñ Agent:</strong>
                        <div style="margin: 8px 0 0 0; color: #212121; font-size: 14px; line-height: 1. 8;">{formatted_response}</div>
                    </div>
                    """))
            
            # Display current message and response
            with self.chat_output:
                display(HTML(f"""
                <div style="background: #e3f2fd; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #2196f3; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                    <strong style="color: #1565c0; font-size: 14px;">üíº You:</strong>
                    <p style="margin: 8px 0 0 0; color: #212121; font-size: 14px; line-height: 1.6;">{message}</p>
                </div>
                """))
                
                # Convert markdown-style formatting to HTML
                formatted_response = response_text.replace('\n', '<br>')
                formatted_response = formatted_response.replace('**', '<strong>').replace('**', '</strong>')
                
                display(HTML(f"""
                <div style="background: #f5f5f5; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #667eea; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                    <strong style="color: #5e35b1; font-size: 14px;">ü§ñ Agent:</strong>
                    <div style="margin: 8px 0 0 0; color: #212121; font-size: 14px; line-height: 1.8;">{formatted_response}</div>
                </div>
                """))
            
            # Store in history
            self.chat_history.append({"user": message, "agent": response_text})
            
        except Exception as e:
            # Clear thinking indicator
            self.chat_output.clear_output(wait=True)
            
            # Re-display chat history
            for item in self. chat_history:
                with self.chat_output:
                    display(HTML(f"""
                    <div style="background: #e3f2fd; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #2196f3;">
                        <strong style="color: #1565c0;">üíº You:</strong>
                        <p style="margin: 8px 0 0 0; color: #212121;">{item['user']}</p>
                    </div>
                    <div style="background: #f5f5f5; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #667eea;">
                        <strong style="color: #5e35b1;">ü§ñ Agent:</strong>
                        <div style="margin: 8px 0 0 0; color: #212121;">{item['agent']}</div>
                    </div>
                    """))
            
            # Display error
            with self.chat_output:
                display(HTML(f"""
                <div style="background: #ffebee; padding: 15px; border-radius: 10px; margin: 10px 0; border-left: 4px solid #f44336; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                    <strong style="color: #c62828; font-size: 14px;">‚ùå Error:</strong>
                    <p style="margin: 8px 0 0 0; color: #212121; font-size: 14px;">{str(e)}</p>
                </div>
                """))
    
    def on_clear(self, b):
        """Clear chat history"""
        self.chat_output.clear_output()
        self.chat_history = []
        with self.chat_output:
            display(HTML("""
            <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 25px; border-radius: 10px; text-align: center; margin-bottom: 20px;">
                <h2 style="margin: 0; color: white;">ü§ñ Bank Customer Churn Analytics Agent</h2>
                <p style="margin: 10px 0; color: white; font-size: 16px;">Powered by Google Gemini AI</p>
                <p style="font-size: 14px; opacity: 0.95; color: white;">Ask me anything about customer churn patterns and recommendations!</p>
            </div>
            <div style="background: #e8f5e9; padding: 15px; border-radius: 8px; text-align: center; border: 2px solid #4caf50;">
                <span style="color: #2e7d32; font-size: 16px; font-weight: bold;">‚úÖ Chat cleared!  Ask me anything about customer churn. </span>
            </div>
            """))

# Create visual chatbot
def create_visual_chatbot(agent):
    """Create and display visual chatbot"""
    chatbot = VisualChatbot(agent)
    chatbot.display()
    return chatbot

print("‚úÖ Visual chatbot loaded with improved contrast!")

‚úÖ Visual chatbot loaded with improved contrast!


In [142]:
# Create and display the improved chatbot
visual_chatbot = create_visual_chatbot(agent)

  self.input_box.on_submit(self.on_send)


VBox(children=(HTML(value='\n        <div style="margin-bottom: 15px; padding: 10px; background: #f8f9fa; bord‚Ä¶