# Internal Analytics

In [1]:
# import pandas as pd
from churn_compass.io.file_io import FileIO

In [2]:
io = FileIO()

In [3]:
ANALYTICS_QUERY = {
    "total_customers": """
        SELECT COUNT(*) AS total_customers
        FROM customers
    """,
    "average_balance_by_geography": """
        SELECT Geography, AVG(Balance) AS average_balance
        FROM customers
        GROUP BY Geography
    """,
    "churn_rate_by_age_group": """
        SELECT 
            CASE 
                WHEN Age < 30 THEN 'Under 30'
                WHEN Age BETWEEN 30 AND 50 THEN '30-50'
                ELSE 'Over 50'
            END AS age_group,
            AVG(Exited) AS churn_rate
        FROM customers
        GROUP BY age_group
    """,
    "min_max_tenure": """
        SELECT MIN(Tenure) AS min_tenure, MAX(Tenure) AS max_tenure
        FROM customers
    """,
}

In [5]:
df = io.run_internal_analytics_with_duckdb(
    ANALYTICS_QUERY["min_max_tenure"],
    files={"customers": "../data/raw/Customer-Churn-Records.csv"},
)
print(df)

{"@timestamp": "2026-01-01T10:33:02.802308+00:00", "level": "INFO", "logger": "churn_compass.io.file_io", "message": "Executing DuckDB query: \n        S...", "module": "file_io", "func": "run_internal_analytics_with_duckdb", "line": 283, "taskName": "Task-51"}
{"@timestamp": "2026-01-01T10:33:02.825039+00:00", "level": "INFO", "logger": "churn_compass.io.file_io", "message": "DuckDB query complete", "module": "file_io", "func": "run_internal_analytics_with_duckdb", "line": 286, "taskName": "Task-51", "rows_returned": 1, "columns": 2}
   min_tenure  max_tenure
0           0          10
