In [13]:
!pip install google-cloud-aiplatform google-cloud-bigquery pandas google-genai




In [14]:
from google.cloud import bigquery, aiplatform
from google import genai
from google.genai.types import HttpOptions
import os
import json
import pandas as pd
from datetime import datetime, date

In [28]:
# Project setup
PROJECT_ID = "mmbtestproject"
if not PROJECT_ID or PROJECT_ID == "mmbtestproject":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))
    
LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

# BigQuery setup
DATASET_ID = "salesforce_data"
SOURCE_TABLE_ID = "extracted_features"
# RESULT_TABLE_ID = "churn_predictions"

# Google Generative AI Setup
GENAI_CLIENT = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

#MODEL_ID = "gemini-2.0-flash-001"  # @param {type: "string"}
MODEL_ID = "gemini-2.0-pro-exp-02-05"

In [16]:
def convert_dates(obj):
    """Convert date and datetime objects to ISO 8601 formatted strings."""
    if isinstance(obj, (datetime, date)):
        return obj.isoformat()  # Converts to 'YYYY-MM-DD' or 'YYYY-MM-DDTHH:MM:SS'
    raise TypeError(f"Type {type(obj)} not serializable")


In [35]:
def extract_features_from_bigquery(project_id, location, dataset_id, table_id):
    client = bigquery.Client(project=project_id, location=location)
    query = f"""
        SELECT * FROM `{project_id}.{dataset_id}.{table_id}`
    """
    df = client.query(query).to_dataframe()
    return df

def extract_some_features_from_bigquery(project_id, location, dataset_id, table_id):
    client = bigquery.Client(project=project_id, location=location)
    query = f"""
        SELECT * 
        FROM `{project_id}.{dataset_id}.{table_id}`
        ORDER BY Client_ID ASC
        LIMIT 2
    """
    df = client.query(query).to_dataframe()
    return df

In [18]:
def analyze_with_gemini(client_data, prompt, project_id, location):
    
    contents = (prompt + json.dumps(client_data, indent=2, default=convert_dates)
    )
    
    response = GENAI_CLIENT.models.generate_content(
                model=MODEL_ID, contents=contents
                )
    
    return response.text


In [19]:
def store_results_in_bigquery(project_id, location, dataset_id, table_id, results):
    client = bigquery.Client(project=project_id, location=location)
    dataset_ref = client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_id)

    # Check if table exists
    try:
        client.get_table(table_ref)  # If table exists, this succeeds
        print(f"Table {dataset_id}.{table_id} found.")
    except Exception as e:
        if "Not found" in str(e):
            print(f"Table {dataset_id}.{table_id} not found. Creating table...")
            
            # Define schema based on `results` structure
            schema = [
                bigquery.SchemaField("Client_ID", "STRING"),
                bigquery.SchemaField("Churn_Analysis", "STRING"),
            ]
            
            table = bigquery.Table(table_ref, schema=schema)
            table = client.create_table(table)
            print(f"Created table {dataset_id}.{table_id}")
        else:
            raise  # Rethrow unexpected errors

    # Insert data
    errors = client.insert_rows_json(table_ref, results)
    if errors:
        print("BigQuery insertion errors:", errors)
    else:
        print("Results successfully stored in BigQuery.")

In [30]:
file_path = "churn_analysis_results_3.txt"
RESULT_TABLE_ID = "churn_predictions_3"


prompt_1 = "Analyze the risk of client churn based on the following data: "
prompt_2 = "Act as skilled finacial analist. Mark client to be under the risk of client churn based on the following data: "

In [40]:
prompt_3 = """You are an AI specialized in customer churn prediction.
Your task is to analyze client churn risk based on the provided data and output in a structured way as follows:  

1. Row (Serial number starting from 1)  
2. Client_ID (Unique identifier for the client)  
3. Is under the churn risk (A floating-point score between 0.0 and 1.0, where 1.0 indicates 100% likelihood of churn)  
4. Short conclusion (Summarize in 2-3 sentences whether the client is likely to churn and why)  
5. Analysis (Detailed breakdown of key churn risk indicators)  

Instructions for AI Output
- Assign a churn risk score based on engagement, unresolved issues, and opportunity trends.  
- Keep the Short Conclusion concise (2-3 sentences). 
- Provide detailed analysis in the last column.  
- Ensure all fields are filled.  

Now, analyze the following client data and generate the output as instucted:  
"""


In [41]:
# Main workflow

# Extract client features
client_data = extract_some_features_from_bigquery(PROJECT_ID, LOCATION, DATASET_ID, SOURCE_TABLE_ID)
print("Created client data")

results = []
for _, row in client_data.iterrows():
    response_text = analyze_with_gemini(row.to_dict(), prompt_3, PROJECT_ID, LOCATION)
    print(response_text)
    results.append({"Client_ID": row["Client_ID"], "Churn_Analysis": response_text})
    
    
# Save results as a JSON-formatted text file
with open(file_path, "w") as f:
    for result in results:
        f.write(json.dumps(result) + "\n")  # Write each result as a JSON string

print(f"Results saved to {file_path}")

# Store results in BigQuery
store_results_in_bigquery(PROJECT_ID, LOCATION, DATASET_ID, RESULT_TABLE_ID, results)
    


Created client data
Here's the churn risk analysis for the provided client data:

| Row | Client_ID | Is under the churn risk | Short conclusion | Analysis |
|-----|-----------|--------------------------|-------------------|----------|
| 1   | C100000  | 0.75                     | This client is at high risk of churn due to their inactive status and low engagement. The client shows declining opportunity interest and has had a recent high-priority case, indicating potential dissatisfaction. | **Status:** Inactive, which is a major red flag.  <br> **Account Age:** Long account age (1614 days) but inactivity suggests disengagement. <br> **Opportunity Trend:** Only 1 out of 5 opportunities were Closed Won, with 3 still open and 1 Lost. Low closed-won ratio indicates problems in converting potential to sales.<br> **Average Opportunity Amount:** A fair average opportunity amount ($312,133.4) but this is diminished by low conversion.<br>**Last Opportunity Close Date**: Is in the future (2025-

In [37]:
def calculate_churn_risk(client_data):
    churn_risk = 0.0
    reasons = []

    # Status
    if client_data["Status"] == "Inactive":
        churn_risk += 0.4
        reasons.append("Client status is Inactive.")

    # Opportunities
    opportunity_win_rate = client_data["Closed_Won_Opportunities"] / client_data["Total_Opportunities"] if client_data["Total_Opportunities"] > 0 else 0
    if opportunity_win_rate < 0.3:
        churn_risk += 0.15
        reasons.append(f"Low opportunity win rate ({opportunity_win_rate:.2f}).")
    if client_data["Open_Opportunities"] == 0 and client_data["Closed_Won_Opportunities"] <=1:
        churn_risk += 0.1
        reasons.append("No open opportunities and few closed-won opportunities.")

    # Communication
    last_communication_date = datetime.strptime(client_data["Last_Communication_Date"], "%Y-%m-%d")
    days_since_last_communication = (datetime.now() - last_communication_date).days
    if days_since_last_communication > 180:
        churn_risk += 0.25
        reasons.append(f"Long time since last communication ({days_since_last_communication} days).")
    elif days_since_last_communication > 90:
        churn_risk += 0.1
        reasons.append(f"{days_since_last_communication} days since last communication.")

    # Cases
    if client_data["High_Priority_Cases"] > 0 and client_data["Open_Cases"]==0 :
      churn_risk +=0.05
      reasons.append("Client previously opened High Priority Cases")

    # Avg Case Age Days (Negative values can indicate data errors or future dates)
    if client_data["Avg_Case_Age_Days"] < 0 :
      churn_risk += 0.05
      reasons.append(f"Negative Avg_Case_Age_Days which might indicate some problems with the data ({client_data['Avg_Case_Age_Days']})")

    churn_risk = min(churn_risk, 1.0)  # Ensure churn risk doesn't exceed 1.0

    short_conclusion = ""
    if churn_risk >= 0.7:
        short_conclusion = "High risk of churn.  The client is inactive and has had limited recent engagement."
    elif churn_risk >= 0.4:
        short_conclusion = "Moderate risk of churn.  Several factors indicate potential churn risk, requiring attention."
    else:
        short_conclusion = "Low risk of churn.  The client shows signs of continued engagement."

    analysis = " ".join(reasons)

    return churn_risk, short_conclusion, analysis

# churn_risk, short_conclusion, analysis = calculate_churn_risk(client_data)