In [5]:
!pip install google-cloud-aiplatform google-cloud-bigquery pandas google-genai


Collecting google-genai
  Downloading google_genai-1.2.0-py3-none-any.whl.metadata (26 kB)
Downloading google_genai-1.2.0-py3-none-any.whl (130 kB)
Installing collected packages: google-genai
Successfully installed google-genai-1.2.0


In [26]:
from google.cloud import bigquery, aiplatform
from google import genai
from google.genai.types import HttpOptions
import os
import json
import pandas as pd
from datetime import datetime, date

In [16]:
# Project setup
PROJECT_ID = "mmbtestproject"
if not PROJECT_ID or PROJECT_ID == "mmbtestproject":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))
    
LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

# BigQuery setup
DATASET_ID = "salesforce_data"
SOURCE_TABLE_ID = "extracted_features"
RESULT_TABLE_ID = "churn_predictions"

# Google Generative AI Setup
GENAI_CLIENT = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

MODEL_ID = "gemini-2.0-flash-001"  # @param {type: "string"}

In [23]:
def convert_dates(obj):
    """Convert date and datetime objects to ISO 8601 formatted strings."""
    if isinstance(obj, (datetime, date)):
        return obj.isoformat()  # Converts to 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'
    raise TypeError(f"Type {type(obj)} not serializable")


In [17]:
def extract_features_from_bigquery(project_id, location, dataset_id, table_id):
    client = bigquery.Client(project=project_id, location=location)
    query = f"""
        SELECT * FROM `{project_id}.{dataset_id}.{table_id}`
    """
    df = client.query(query).to_dataframe()
    return df

In [24]:
def analyze_with_gemini(client_data, project_id, location):
    
    prompt = (
        "Analyze the risk of client churn based on the following data: "
        + json.dumps(client_data, indent=2, default=convert_dates)
    )
    
    response = GENAI_CLIENT.models.generate_content(
                model=MODEL_ID, contents=prompt
                )
    
    return response.text


In [19]:
def store_results_in_bigquery(project_id, location, dataset_id, table_id, results):
    client = bigquery.Client(project=project_id, location=location)
    table_ref = client.dataset(dataset_id).table(table_id)
    errors = client.insert_rows_json(table_ref, results)
    if errors:
        print("BigQuery insertion errors:", errors)
    else:
        print("Results successfully stored in BigQuery.")

In [29]:
# Main workflow

# Extract client features
client_data = extract_features_from_bigquery(project_id, location, dataset_id, source_table_id)
print("Created client data")

results = []
for _, row in client_data.iterrows():
    response_text = analyze_with_gemini(row.to_dict(), project_id, location)
    print(response_text)
    results.append({"Client_ID": row["Client_ID"], "Churn_Analysis": response_text})

# Store results in BigQuery
#store_results_in_bigquery(project_id, location, dataset_id, result_table_id, results)
    


Created client data
Okay, let's analyze the churn risk for client C100009 based on the provided data.

**Summary of the Data:**

*   **Client_ID:** C100009
*   **Industry:** Healthcare
*   **Size:** Large (Revenue: $35.5M, Employees: 2385)
*   **Status:** Active
*   **Tenure:** 521 days (approximately 1.43 years)
*   **Engagement:** Low opportunity conversion rate, mixed communication activity.
*   **Support:** Some case activity.

**Churn Risk Factors (Potential Red Flags):**

*   **Zero Closed Won Opportunities:** This is a major red flag.  Despite having two opportunities, none have been won. This suggests either sales challenges in closing deals, pricing issues, a bad product fit, or competition winning.
*   **High Average Opportunity Amount:** The average opportunity amount is quite high ($333,431.5).  This, combined with zero wins, suggests the sales cycle is getting blocked or is very long and the client may go elsewhere.
*   **Few Communications:** The communication count is 6 