# Build Parquet Files - Ignore because this is Built into the Testing Dashboard

In [2]:
import pandas as pd
import logging
from pathlib import Path
from dashboard_utils import generate_week_options_from_parquet
from data_fetcher import load_jobs_data, download_conversion_report, fetch_roi
import requests
import pandas as pd
from bs4 import BeautifulSoup
from data_fetcher import get_session_with_canvas_cookie

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")

# Output directory
DATA_DIR = Path("MasterData")
DATA_DIR.mkdir(parents=True, exist_ok=True)

JOBS_OUTFILE = DATA_DIR / "all_jobs_data.parquet"
CALLS_OUTFILE = DATA_DIR / "all_call_center_data.parquet"
ROI_OUTFILE = DATA_DIR / "all_roi_data.parquet"

# 1) Load whatever’s already on disk
jobs_all  = pd.read_parquet(JOBS_OUTFILE)
calls_all = pd.read_parquet(CALLS_OUTFILE)
roi_all   = pd.read_parquet(ROI_OUTFILE)    # ← new

# 2) Ensure last full Sun–Sat is in there for all three datasets
from dashboard_utils import fetch_and_append_week_if_needed
jobs_all, calls_all, roi_all = fetch_and_append_week_if_needed(
    jobs_all,
    calls_all,
    roi_all
)

# 3) Persist back to disk so the new week “sticks”
jobs_all.to_parquet(JOBS_OUTFILE, index=False)
calls_all.to_parquet(CALLS_OUTFILE, index=False)
roi_all.to_parquet(ROI_OUTFILE, index=False)    # ← also write ROI

# 4) Now build your list of weeks
week_options = generate_week_options_from_parquet(jobs_all)
week_ranges  = week_options

# ─── 1. Aggregate Jobs Data ────────────────────────────────────────────
job_dfs = []
logging.info("Fetching ALL Jobs data across available weeks...")

for wk in week_ranges:
    start, end = wk["value"].split("|")
    try:
        df = load_jobs_data(start, end)
        df["week_start"] = start
        df["week_end"] = end
        job_dfs.append(df)
    except Exception as e:
        logging.warning(f"❌ Failed to load jobs data for {start} – {end}: {e}")

if job_dfs:
    all_jobs = pd.concat(job_dfs, ignore_index=True)

    # 🛠️ FIX: force ID to string to ensure consistent Parquet typing
    all_jobs["ID"] = all_jobs["ID"].astype(str)

    all_jobs.to_parquet(JOBS_OUTFILE, index=False)
    logging.info(f"✅ All jobs data saved to: {JOBS_OUTFILE}")

else:
    logging.warning("⚠️ No jobs data collected.")

# ─── 2. Aggregate Call Center Data ──────────────────────────────────────
call_dfs = []
logging.info("Fetching ALL Call Center data (inbound and outbound) across available weeks...")

for wk in week_ranges:
    start, end = wk["value"].split("|")

    for include_homeshow in [False, True]:  # Inbound then Outbound
        try:
            df, _ = download_conversion_report(start, end, include_homeshow=include_homeshow)
            df["week_start"] = start
            df["week_end"] = end
            df["mode"] = "outbound" if include_homeshow else "inbound"
            call_dfs.append(df)
        except Exception as e:
            mode = "Outbound" if include_homeshow else "Inbound"
            logging.warning(f"❌ Failed to fetch {mode} call data for {start} – {end}: {e}")

if call_dfs:
    all_calls = pd.concat(call_dfs, ignore_index=True)
    all_calls.to_parquet(CALLS_OUTFILE, index=False)
    logging.info(f"✅ All call center data saved to: {CALLS_OUTFILE}")
else:
    logging.warning("⚠️ No call center data collected.")

# ─── 4. Fetch ROI ───────────────────────────────────────────────────────────
logging.info("Fetching ROI across available weeks...")
sess = get_session_with_canvas_cookie()
roi_dfs = []
for wk in week_ranges:
    start, end = wk["value"].split("|")
    try:
        df_roi = fetch_roi(start, end, sess)
        if not df_roi.empty:
            roi_dfs.append(df_roi)
            logging.info(f"  ✓ ROI {start}–{end}")
    except Exception as e:
        logging.warning(f"✗ ROI {start}–{end} failed: {e}")

if roi_dfs:
    pd.concat(roi_dfs, ignore_index=True).to_parquet(ROI_OUTFILE, index=False)
    logging.info(f"✅ ROI saved to {ROI_OUTFILE}")
else:
    logging.warning("⚠️ No ROI data collected.")

2025-07-10 14:12:55,958 - Fetching ALL Jobs data across available weeks...
2025-07-10 14:12:55,967 - ✅ All jobs data saved to: MasterData/all_jobs_data.parquet
2025-07-10 14:12:55,967 - Fetching ALL Call Center data (inbound and outbound) across available weeks...


✅ Jobs data for 06/29/2025 – 07/05/2025 already present.
✅ Call Center data for 06/29/2025 – 07/05/2025 already present.
✅ ROI data for 06/29/2025 – 07/05/2025 already present.
→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…

→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…

→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…

→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…

→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…

→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…

→ Looking locally for Jobs Data…
   • Found it. Loading Jobs Data from your computer…



2025-07-10 14:13:14,899 - ✅ All call center data saved to: MasterData/all_call_center_data.parquet
2025-07-10 14:13:14,900 - Fetching ROI across available weeks...
2025-07-10 14:13:24,933 -   ✓ ROI 06/29/2025–07/05/2025
2025-07-10 14:13:30,450 -   ✓ ROI 06/22/2025–06/28/2025
2025-07-10 14:13:35,647 -   ✓ ROI 06/15/2025–06/21/2025
2025-07-10 14:13:41,090 -   ✓ ROI 06/08/2025–06/14/2025
2025-07-10 14:13:46,698 -   ✓ ROI 06/01/2025–06/07/2025
2025-07-10 14:13:52,273 -   ✓ ROI 05/25/2025–05/31/2025
2025-07-10 14:13:57,659 -   ✓ ROI 05/18/2025–05/24/2025
2025-07-10 14:13:57,661 - ✅ ROI saved to MasterData/all_roi_data.parquet


# Parquet Viewer

In [10]:
import pandas as pd
from pathlib import Path
from dash import Dash, dcc, html, dash_table

# ─── Configuration ────────────────────────────────────────────────────────
DATA_DIR = Path("Master_Data")

JOBS_FILE = DATA_DIR / "all_jobs_data.parquet"
CALLS_FILE = DATA_DIR / "all_call_center_data.parquet"
ROI_FILE = DATA_DIR / "all_roi_data.parquet"

# ─── Load Data ─────────────────────────────────────────────────────────────
def safe_load_parquet(path):
    if path.exists():
        return pd.read_parquet(path)
    else:
        return pd.DataFrame()

jobs_df = safe_load_parquet(JOBS_FILE)
calls_df = safe_load_parquet(CALLS_FILE)
roi_df = safe_load_parquet(ROI_FILE)

# ─── App Initialization ────────────────────────────────────────────────────
app = Dash(__name__)
app.title = "Parquet Viewer"

# ─── App Layout ────────────────────────────────────────────────────────────
app.layout = html.Div([
    html.H2("📦 Parquet File Viewer", style={"textAlign": "center", "color": "#2C3E70"}),

    dcc.Tabs([
        dcc.Tab(label="Jobs Data", children=[
            html.H4("Jobs Dataset", style={"marginTop": "10px"}),
            dash_table.DataTable(
                data=jobs_df.to_dict("records"),
                columns=[{"name": i, "id": i} for i in jobs_df.columns],
                page_size=20,
                style_table={"overflowX": "auto"},
                filter_action="native",
                sort_action="native",
            )
        ]),
        dcc.Tab(label="Call Center Data", children=[
            html.H4("Call Center Dataset", style={"marginTop": "10px"}),
            dash_table.DataTable(
                data=calls_df.to_dict("records"),
                columns=[{"name": i, "id": i} for i in calls_df.columns],
                page_size=20,
                style_table={"overflowX": "auto"},
                filter_action="native",
                sort_action="native",
            )
        ]),
        dcc.Tab(label="ROI Data", children=[
            html.H4("ROI Dataset", style={"marginTop": "10px"}),
            dash_table.DataTable(
                data=roi_df.to_dict("records"),
                columns=[{"name": i, "id": i} for i in roi_df.columns],
                page_size=20,
                style_table={"overflowX": "auto"},
                filter_action="native",
                sort_action="native",
            )
        ]),
    ])
], style={"fontFamily": "Segoe UI", "margin": "20px"})


# ─── Run App ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
    app.run(debug=True, port=8061)


# Parquet Editor

In [2]:
import pandas as pd
from pathlib import Path

# Paths to your local Parquet files
jobs_path = Path("Master_Data/all_jobs_data.parquet")
calls_path = Path("Master_Data/all_call_center_data.parquet")
roi_path = Path("Master_Data/all_roi_data.parquet")

def remove_latest_week(df):
    df["week_end"] = pd.to_datetime(df["week_end"])
    latest_week = df["week_end"].dt.to_period("W").max()
    return df[df["week_end"].dt.to_period("W") != latest_week]

# Load and clean
jobs_df = pd.read_parquet(jobs_path)
calls_df = pd.read_parquet(calls_path)
roi_df = pd.read_parquet(roi_path)

jobs_df_clean = remove_latest_week(jobs_df)
calls_df_clean = remove_latest_week(calls_df)
roi_df_clean = remove_latest_week(roi_df)

# Overwrite the original files
jobs_df_clean.to_parquet(jobs_path, index=False)
calls_df_clean.to_parquet(calls_path, index=False)
roi_df_clean.to_parquet(roi_path, index=False)

print("✅ Most recent week removed from all Parquet files.")

# Function to normalize the 'week_end' column format
def normalize_week_end_format(df):
    df["week_end"] = pd.to_datetime(df["week_end"]).dt.strftime("%m/%d/%Y")
    return df

# Update each file
for path in [jobs_path, calls_path, roi_path]:
    df = pd.read_parquet(path)
    df = normalize_week_end_format(df)
    df.to_parquet(path, index=False)

print("✅ Most recent week formatted correctly in all Parquet files.")


✅ Most recent week removed from all Parquet files.
