# Capstone Project: AI Data Quality Checker
### Version: 1.0
### Created by: Omar Chehab
### Date: 29-11-2025

Importing Modules

In [49]:
from typing import Any, Dict
import os
from kaggle_secrets import UserSecretsClient

from google.adk.agents import Agent, LlmAgent, SequentialAgent
from google.adk.models.google_llm import Gemini
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.adk.tools.tool_context import ToolContext
from google.genai import types
from google.adk.tools import AgentTool, FunctionTool, google_search
from google.adk.runners import InMemoryRunner
from google import generativeai as genai


print("‚úÖ ADK components imported successfully.")

‚úÖ ADK components imported successfully.


API Configuration

In [50]:
# Load API Key from Kaggle Secrets
try:
    from kaggle_secrets import UserSecretsClient
    GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
    print("‚úÖ Gemini API key setup complete.")
except ImportError:
    print("‚ö†Ô∏è Kaggle Secrets not available. Ensure you're in a Kaggle Notebook.")
except KeyError:
    print("üîë Authentication Error: Add 'GOOGLE_API_KEY' to Kaggle secrets.")


# Agent Configuration
CONFIG = {
    "project": "",
    "model": "models/gemini-2.5-flash",
    "max_tokens": 2000,
    "temperature": 0.3,
    "version": "1.0"
}

print(f"\n{'='*60}")
print(f"{'AGENT CONFIGURATION':^60}")
print(f"{'='*60}")
for k, v in CONFIG.items():
    print(f"{k:.<25} {v}")
print(f"{'='*60}")

‚úÖ Gemini API key setup complete.

                    AGENT CONFIGURATION                     
project.................. 
model.................... models/gemini-2.5-flash
max_tokens............... 2000
temperature.............. 0.3
version.................. 1.0


In [51]:
retry_config = types.HttpRetryOptions(
    attempts=5, # Maximum retry attempts
    exp_base=2, # Delay multiplier
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504] # HTTP errors retry
)

print("‚úÖ Retry configuration defined.")

‚úÖ Retry configuration defined.


In [52]:
# Lists all LLM models available in your Google Generative AI account
models = genai.list_models()

# Prints each model name
for model in models:
    print(model.name)

models/embedding-gecko-001
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/learnlm-2.0-flash-experimental
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
models/gemma-3n-e2b-it
models/gemini-flash-latest
models/gemini-flash-lite-latest
models/gemini-pro-latest
models/gemini-2.5-flash-lite
models/gemini-2.5-flash-image-preview
mod

Loading Dataset - Churn Prediction

In [59]:
# Loading DataFrame
df = pd.read_csv("/kaggle/input/bank-customer-churn-prediction-dataset/Churn_Modelling.csv")

In [60]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [61]:
df.columns.tolist()

['RowNumber',
 'CustomerId',
 'Surname',
 'CreditScore',
 'Geography',
 'Gender',
 'Age',
 'Tenure',
 'Balance',
 'NumOfProducts',
 'HasCrCard',
 'IsActiveMember',
 'EstimatedSalary',
 'Exited']

# Defining AI Agents

In [62]:
### Data Quality Checker Agent

In [69]:
def model_config(system_prompt: str, user_prompt: str) -> str:
    try:
        api_key = UserSecretsClient().get_secret("GEMINI_API_KEY")
    except Exception:
        return "[Simulated LLM: GEMINI_API_KEY secret not accessible in this environment.]"
    
    try:    
        gen.configure(api_key=api_key)
        model = gen.GenerativeModel("gemini-2.5-flash")
        prompt = system_prompt + "\n\n" + user_prompt
        response = model.generate_content(prompt)
        return response.text
            
    except Exception as e:
        return f"[Simulated LLM: Gemini unreachable ‚Üí {e}]"

In [71]:
def metrics_data_prep(df):
    pass


def meta_data_generator():
    