# Consolidated Holdings Rollup

Reads from all entity lakehouses via OneLake shortcuts and creates consolidated
holdings-level views:
- Revenue rollup across all 5 entities
- Headcount by entity
- Cost allocation
- Entity health metrics

**Schedule:** Daily at 6:00 AM UTC (after entity pipelines complete)

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import (
    col, lit, current_timestamp, when, coalesce, count, sum as spark_sum,
    avg, round as spark_round, to_date, year, month, date_format, current_date
)
from pyspark.sql.types import DoubleType, IntegerType, StringType
from datetime import datetime

spark = SparkSession.builder.getOrCreate()

ROLLUP_LAKEHOUSE = "lh-holdings-rollup"
BASE_PATH = f"abfss://{ROLLUP_LAKEHOUSE}@onelake.dfs.fabric.microsoft.com"

# Entity workspace paths (via OneLake shortcuts in consolidated lakehouse)
TVS_PATH = f"{BASE_PATH}/Tables/tvs"
CONSULTING_PATH = f"{BASE_PATH}/Tables/consulting"
PLATFORM_PATH = f"{BASE_PATH}/Tables/platform"
MEDIA_PATH = f"{BASE_PATH}/Tables/media"
A3_PATH = f"{BASE_PATH}/Tables/a3"

ENTITIES = ["TAIA", "TVS", "Lobbi Consulting", "Medicare Consulting", "Media Company"]

print(f"Consolidated rollup started at {datetime.utcnow().isoformat()}")
print(f"Reading from {ROLLUP_LAKEHOUSE} shortcuts")

In [None]:
# ── Cell 2: Revenue Rollup ───────────────────────────────────────────────

from functools import reduce
from pyspark.sql import DataFrame

revenue_frames = []

# TVS Revenue: Monthly subscription revenue from active subscriptions
try:
    df_tvs_subs = spark.read.format("delta").load(f"{TVS_PATH}/fact_subscriptions")
    df_tvs_revenue = (
        df_tvs_subs
        .filter(col("tvs_status") == 100000001)  # Active
        .withColumn("monthly_revenue", col("tvs_monthlyprice").cast(DoubleType()))
        .agg(spark_sum("monthly_revenue").alias("monthly_revenue"))
        .withColumn("entity", lit("TVS"))
        .withColumn("revenue_type", lit("Subscription"))
        .withColumn("annual_projected", col("monthly_revenue") * 12)
    )
    revenue_frames.append(df_tvs_revenue)
    print(f"TVS subscription data loaded")
except Exception as e:
    print(f"TVS data not available: {e}")

# Consulting Revenue: Engagement values
try:
    df_consulting_eng = spark.read.format("delta").load(f"{CONSULTING_PATH}/dv_engagements")
    df_consulting_accts = spark.read.format("delta").load(f"{CONSULTING_PATH}/dv_accounts")
    
    # Revenue by consulting entity
    for entity_code, entity_name in [(100000000, "Lobbi Consulting"), (100000001, "Medicare Consulting")]:
        entity_accounts = df_consulting_accts.filter(col("tvs_entity") == entity_code).select("tvs_consultingaccountid")
        df_entity_rev = (
            df_consulting_eng
            .join(entity_accounts, df_consulting_eng["tvs_accountid"] == entity_accounts["tvs_consultingaccountid"])
            .filter(col("tvs_status").isin([100000002, 100000004]))  # Active or Completed
            .withColumn("value", col("tvs_value").cast(DoubleType()))
            .withColumn("invoiced", col("tvs_invoicedamount").cast(DoubleType()))
            .agg(
                spark_sum("value").alias("total_contract_value"),
                spark_sum("invoiced").alias("total_invoiced")
            )
            .withColumn("entity", lit(entity_name))
            .withColumn("revenue_type", lit("Engagement"))
            .withColumn("monthly_revenue", col("total_invoiced") / 12)
            .withColumn("annual_projected", col("total_contract_value"))
        )
        revenue_frames.append(df_entity_rev)
    print(f"Consulting data loaded")
except Exception as e:
    print(f"Consulting data not available: {e}")

# TAIA / A3 Revenue: Historical commission data
try:
    df_a3_commissions = spark.read.format("delta").load(f"{A3_PATH}/commissions")
    df_taia_rev = (
        df_a3_commissions
        .withColumn("amount", col("amount").cast(DoubleType()))
        .agg(spark_sum("amount").alias("total_commissions"))
        .withColumn("entity", lit("TAIA"))
        .withColumn("revenue_type", lit("Commission"))
        .withColumn("monthly_revenue", col("total_commissions") / 12)
        .withColumn("annual_projected", col("total_commissions"))
    )
    revenue_frames.append(df_taia_rev)
    print("TAIA/A3 commission data loaded")
except Exception as e:
    print(f"A3 data not available: {e}")

# Combine all revenue data
if revenue_frames:
    df_revenue_rollup = reduce(
        lambda a, b: a.select("entity", "revenue_type", "monthly_revenue", "annual_projected")
            .unionByName(
                b.select("entity", "revenue_type", "monthly_revenue", "annual_projected"),
                allowMissingColumns=True
            ),
        revenue_frames
    ).withColumn("calculated_at", current_timestamp())

    df_revenue_rollup.write.format("delta").mode("overwrite").save(f"{BASE_PATH}/Tables/rollup_revenue")
    print(f"\nRevenue rollup written: {df_revenue_rollup.count()} entity records")
    df_revenue_rollup.show(truncate=False)
else:
    print("No revenue data available for rollup")

In [None]:
# ── Cell 3: Headcount and Cost Allocation ────────────────────────────────

# Headcount from TVS time entries (unique VAs)
headcount_data = []

try:
    df_tvs_time = spark.read.format("delta").load(f"{TVS_PATH}/fact_time_entries")
    tvs_active_vas = df_tvs_time.select("tvs_userid").distinct().count()
    tvs_total_hours = df_tvs_time.filter(col("is_billable") == True).agg(spark_sum("tvs_hours")).collect()[0][0] or 0
    headcount_data.append(("TVS", "Virtual Assistants", tvs_active_vas, float(tvs_total_hours), 0.0))
    print(f"TVS: {tvs_active_vas} active VAs, {tvs_total_hours:.1f} total billable hours")
except Exception as e:
    print(f"TVS headcount not available: {e}")

# Consulting headcount from activities (unique owners)
try:
    df_consulting_acts = spark.read.format("delta").load(f"{CONSULTING_PATH}/dv_activities")
    consulting_staff = df_consulting_acts.select("tvs_owner").distinct().count()
    headcount_data.append(("Lobbi Consulting", "Consultants", consulting_staff, 0.0, 0.0))
    headcount_data.append(("Medicare Consulting", "Consultants", max(1, consulting_staff // 2), 0.0, 0.0))
    print(f"Consulting: {consulting_staff} unique staff")
except Exception as e:
    print(f"Consulting headcount not available: {e}")

# A3/TAIA headcount from brokers
try:
    df_brokers = spark.read.format("delta").load(f"{A3_PATH}/brokers")
    broker_count = df_brokers.count()
    headcount_data.append(("TAIA", "Brokers", broker_count, 0.0, 0.0))
    print(f"TAIA: {broker_count} brokers")
except Exception as e:
    print(f"TAIA headcount not available: {e}")

# Create headcount DataFrame
if headcount_data:
    df_headcount = (
        spark.createDataFrame(headcount_data, ["entity", "role_type", "headcount", "total_hours", "cost_allocated"])
        .withColumn("calculated_at", current_timestamp())
    )
    
    # Cost allocation: estimated based on entity type
    df_cost = df_headcount.withColumn("estimated_monthly_cost",
        when(col("role_type") == "Virtual Assistants", col("headcount") * 800)  # VA avg cost
        .when(col("role_type") == "Consultants", col("headcount") * 5000)  # Consultant avg cost
        .when(col("role_type") == "Brokers", col("headcount") * 0)  # Commission-based
        .otherwise(lit(0))
    )
    
    df_cost.write.format("delta").mode("overwrite").save(f"{BASE_PATH}/Tables/rollup_headcount")
    df_cost.write.format("delta").mode("overwrite").save(f"{BASE_PATH}/Tables/rollup_cost_allocation")
    print(f"\nHeadcount & cost rollup written")
    df_cost.show(truncate=False)
else:
    print("No headcount data available")

In [None]:
# ── Cell 4: Entity Health Metrics ─────────────────────────────────────────

health_data = []

# TVS Health: based on account health scores
try:
    df_tvs_health = spark.read.format("delta").load(f"{TVS_PATH}/agg_account_health")
    tvs_avg_health = df_tvs_health.agg(avg("health_score")).collect()[0][0] or 0
    tvs_total_accounts = df_tvs_health.count()
    tvs_critical = df_tvs_health.filter(col("health_category") == "Critical").count()
    health_data.append(("TVS", float(tvs_avg_health), tvs_total_accounts, tvs_critical, "Operational"))
    print(f"TVS health: avg={tvs_avg_health:.1f}, accounts={tvs_total_accounts}, critical={tvs_critical}")
except Exception as e:
    print(f"TVS health data not available: {e}")
    health_data.append(("TVS", 0.0, 0, 0, "No Data"))

# Consulting Health: based on engagement health
try:
    df_eng_health = spark.read.format("delta").load(f"{CONSULTING_PATH}/agg_engagement_health")
    for entity_name in ["Lobbi Consulting", "Medicare Consulting"]:
        df_entity = df_eng_health.filter(col("entity_name") == entity_name)
        if df_entity.count() > 0:
            ent_avg = df_entity.agg(avg("health_score")).collect()[0][0] or 0
            ent_total = df_entity.count()
            ent_critical = df_entity.filter(col("health_label") == "Critical").count()
            health_data.append((entity_name, float(ent_avg), ent_total, ent_critical, "Operational"))
        else:
            health_data.append((entity_name, 0.0, 0, 0, "No Data"))
    print(f"Consulting health loaded")
except Exception as e:
    print(f"Consulting health not available: {e}")
    health_data.append(("Lobbi Consulting", 0.0, 0, 0, "No Data"))
    health_data.append(("Medicare Consulting", 0.0, 0, 0, "No Data"))

# TAIA Health: based on A3 data completeness
try:
    df_brokers = spark.read.format("delta").load(f"{A3_PATH}/brokers")
    df_commissions = spark.read.format("delta").load(f"{A3_PATH}/commissions")
    broker_count = df_brokers.count()
    commission_count = df_commissions.count()
    taia_health = min(100, (broker_count + commission_count) / 10)  # Basic metric
    health_data.append(("TAIA", float(taia_health), broker_count, 0, "Archive"))
    print(f"TAIA: {broker_count} brokers, {commission_count} commissions")
except Exception as e:
    print(f"TAIA data not available: {e}")
    health_data.append(("TAIA", 0.0, 0, 0, "No Data"))

# Media Company placeholder
health_data.append(("Media Company", 50.0, 0, 0, "Setup"))

# Write entity health rollup
df_entity_health = (
    spark.createDataFrame(
        health_data,
        ["entity", "avg_health_score", "total_items", "critical_items", "operational_status"]
    )
    .withColumn("health_category",
        when(col("avg_health_score") >= 75, lit("Healthy"))
        .when(col("avg_health_score") >= 50, lit("Needs Attention"))
        .when(col("avg_health_score") >= 25, lit("At Risk"))
        .otherwise(lit("Critical"))
    )
    .withColumn("calculated_at", current_timestamp())
)

df_entity_health.write.format("delta").mode("overwrite").save(f"{BASE_PATH}/Tables/rollup_entity_health")
print(f"\nEntity health rollup written: {df_entity_health.count()} entities")
df_entity_health.show(truncate=False)
print(f"Consolidated rollup completed at {datetime.utcnow().isoformat()}")