# parallel freeport

## 🚀 Quick Start

This notebook provides centralized functions for generating client dashboard exports WITH PARALLELIZATION.

**Main Functions:**
- `run_everything_parallel(demo_name)` - Complete dashboard generation with parallel execution
- `run_everything_parallel_internally(demo_name)` - Internal development version with parallelization

**Key Improvements:**
- ✅ Parallel execution of independent modules
- ✅ Faster overall execution time
- ✅ Thread-safe operations
- ✅ Error handling for individual module failures

---

In [0]:
import ast
import calendar
import pandas as pd
from datetime import datetime, timedelta
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, row_number, lit, lower, to_timestamp
from pyspark.sql.window import Window
from yipit_databricks_utils.helpers.gsheets import read_gsheet
from concurrent.futures import ThreadPoolExecutor, as_completed
import traceback

In [0]:
%run "/Workspace/Users/pfisch@yipitdata.com/corporate_transformation_blueprints/corporate_transformation_blueprints/scratch_pfisch/freeport/after__template/freeport2"

In [0]:
def execute_with_error_handling(func, func_name, *args, **kwargs):
    """
    Wrapper function to execute a function with error handling

    Args:
        func: Function to execute
        func_name: Name of the function (for logging)
        *args: Positional arguments for the function
        **kwargs: Keyword arguments for the function

    Returns:
        tuple: (success: bool, result: any, error: str)
    """
    try:
        print(f"🔄 Starting: {func_name}")
        result = func(*args, **kwargs)
        print(f"✅ Completed: {func_name}")
        return (True, result, None)
    except Exception as e:
        error_msg = f"❌ Error in {func_name}: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        return (False, None, error_msg)

In [0]:
client = dbutils.widgets.get("client_name")
print(client)

In [0]:
demo_name = client + "_v38"
sandbox_schema = "ydx_internal_analysts_sandbox"
prod_schema = "ydx_internal_analysts_gold"

market_share = True
shopper_insights = True
pro_module = True
pricing_n_promo = True


max_workers = 4

results = {}

# ============================================
# PHASE 2: Parallel Execution of Independent Modules
# These can run in parallel
# ============================================
print("=" * 80)
print("🚀 PHASE 1: Running Parallel Modules")
print("=" * 80)

parallel_tasks = []

# freeport_module(sandbox_schema, prod_schema, demo_name, "geographic_analysis")
# Collect tasks that can run in parallel

# ============================================================================
# CORE TABLES
# ============================================================================
# Foundation tables used across all analysis modules

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "filter_items")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "client_specs")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "panel_stats")})
    
parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name,"sample_size_guardrail")})


# ============================================================================
# ANALYSIS MODULES
# ============================================================================

# ----------------------------------------------------------------------------
# GEO - Geographic Analysis
# ----------------------------------------------------------------------------

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "geographic_analysis")})

# ----------------------------------------------------------------------------
# MARKET SHARE
# ----------------------------------------------------------------------------
# Note: Market Share tables with LOOP are excluded from this config
# They require special handling for dynamic column generation:
#   - market_share_for_column (with LOOP)
#   - market_share_for_column_nrf (with LOOP)
#   - market_share_for_column_std (with LOOP)


# ----------------------------------------------------------------------------
# PRO INSIGHTS
# ----------------------------------------------------------------------------

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "pro_insights")})


# ----------------------------------------------------------------------------
# PRODUCT ANALYSIS (SKU)
# ----------------------------------------------------------------------------

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "sku_analysis")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "sku_time_series")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "sku_detail")})


# ----------------------------------------------------------------------------
# RET LEAKAGE (Retail Leakage Analysis)
# ----------------------------------------------------------------------------

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "leakage_users")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "leakage_retailer")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "category_closure")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "leakage_product")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "market_share")})

# ----------------------------------------------------------------------------
# SHOPPER INSIGHTS
# ----------------------------------------------------------------------------

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name,  "shopper_filter_items")})

# ----------------------------------------------------------------------------
# TARIFFS
# ----------------------------------------------------------------------------

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "tariffs_month_grouping")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "tariffs_month_stable_products_list")})

parallel_tasks.append({
    "func": freeport_module,
    "name": freeport_module,
    "args": (sandbox_schema, prod_schema, demo_name, "tariffs_month_product_filled_prices")})

# Execute tasks in parallel
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_task = {
        executor.submit(
            execute_with_error_handling,
            task["func"],
            task["name"],
            *task["args"]
        ): task["name"]
        for task in parallel_tasks
    }

    for future in as_completed(future_to_task):
        task_name = future_to_task[future]
        success, result, error = future.result()
        results[task_name] = {"success": success, "error": error}

# ============================================
# Summary Report
# ============================================
print("=" * 80)
print("📈 EXECUTION SUMMARY")
print("=" * 80)

total_tasks = len(results)
successful_tasks = sum(1 for r in results.values() if r["success"])
failed_tasks = total_tasks - successful_tasks

print(f"✅ Successful: {successful_tasks}/{total_tasks}")
print(f"❌ Failed: {failed_tasks}/{total_tasks}")
print()

if failed_tasks > 0:
    print("❌ Failed Modules:")
    for module, result in results.items():
        if not result["success"]:
            print(f"  - {module}")
            if result["error"]:
                print(f"    Error: {result['error'][:200]}...")

print("=" * 80)