In [1]:
import pandas as pd
import sys

sys.path.append("..")

from src.auth.aurora import get_portal_client

In [2]:
portal_client = get_portal_client()

In [3]:
existing_missing_df = pd.read_csv("data/2025-08-04 Masterfire Missing Tasks New.csv").sort_values("site_id")

In [4]:
missing_df = pd.DataFrame({
    "site_id": [],
})

In [5]:
for site_id in existing_missing_df.loc[existing_missing_df["missing"] == 1, "site_id"].astype(int):
    if site_id in missing_df["site_id"].values:
        continue
    print(site_id)
    tests_df = portal_client.query_to_pandas(
        """SELECT
            test.id,
            test.created_at,
            site.template_id,
            site.protocol_id,
            mode_change.toc_id,
            test.panel,
            test.domain,
            test.loop,
            test.address,
            test.sub_address
        FROM topaz.api_service_tests AS test
        INNER JOIN topaz.api_mode_changes AS mode_change
        ON test.mode_change_id = mode_change.id
        INNER JOIN topaz.api_sites AS site
        ON test.site_id = site.id
        WHERE test.site_id = %(site_id)s
        AND test.created_at >= '2024-01-01 00:00:00'""",
        {"site_id": site_id}
    )
    missing_task_records = False
    changed_asset_type_records = []
    tests_df['panel'] = tests_df['panel'].fillna(-999)
    tests_df['domain'] = tests_df['domain'].fillna(-999)
    tests_df['loop'] = tests_df['loop'].fillna(-999)
    tests_df['address'] = tests_df['address'].fillna(-999)
    tests_df['sub_address'] = tests_df['sub_address'].fillna(-999)
    tests_df.loc[tests_df['sub_address'] == '', 'sub_address'] = -999
    tests_df.loc[tests_df['sub_address'] == 0, 'sub_address'] = -999

    for (template_id, protocol_id, toc_id), test_df in tests_df.groupby(["template_id", "protocol_id", "toc_id"]):
        template_df = portal_client.query_to_pandas(
            """SELECT
                id AS task_id,
                asset_type,
                created_at,
                deleted_at
            FROM topaz.api_service_tasks
            WHERE template_id = %(template_id)s
            AND `type` IN (1,3)""",
            {"template_id": int(template_id)}
        )
        template_df["asset_type"] = template_df["asset_type"].str.lower()
        toc_df = portal_client.query_to_pandas(
            """SELECT `panel`, `domain`, `loop`, `address`, `sub_address`, `type`, `lcs_device_type`, `created_at` AS toc_created_at, `deleted_at` AS toc_deleted_at
            FROM topaz.api_toc_rows
            WHERE toc_id = %(toc_id)s""",
            {"toc_id": int(toc_id)}
        )
        toc_df["type"] = toc_df["type"].str.lower()
        toc_df["lcs_device_type"] = toc_df["lcs_device_type"].str.lower()
        toc_df['panel'] = toc_df['panel'].fillna(-999)
        toc_df['domain'] = toc_df['domain'].fillna(-999)
        toc_df['loop'] = toc_df['loop'].fillna(-999)
        toc_df['address'] = toc_df['address'].fillna(-999)
        toc_df['sub_address'] = toc_df['sub_address'].fillna(-999)
        toc_df.loc[toc_df['sub_address'] == 0, 'sub_address'] = -999
        toc_df["legacy_device_type"] = toc_df["lcs_device_type"].copy()
        other_mask = toc_df["lcs_device_type"] == "other"
        toc_df.loc[other_mask, "lcs_device_type"] = toc_df.loc[other_mask, "type"]
        merged_df = pd.merge(
            how="inner",
            left=test_df,
            left_on=["panel", "domain", "loop", "address", "sub_address"],
            right=toc_df,
            right_on=["panel", "domain", "loop", "address", "sub_address"],
        )
        time_mask = (merged_df["created_at"] >= merged_df["toc_created_at"]) & (
            (
                merged_df["created_at"] < merged_df["toc_deleted_at"]
            ) | merged_df["toc_deleted_at"].isnull()
        )
        merged_df = merged_df[time_mask]
        template_df["key"] = 1
        blank_asset_mask = template_df["asset_type"] == ""
        all_assets = pd.DataFrame(
            {"asset_type": toc_df["lcs_device_type"].unique()}
        )
        all_assets["key"] = 1
        full_template_df = pd.concat([
            pd.merge(
                left=template_df.loc[blank_asset_mask, ["key", "task_id", "created_at", "deleted_at"]],
                right=all_assets.loc[:, ["key", "asset_type"]],
                on="key"
            ),
            template_df.loc[~blank_asset_mask, ["key", "task_id", "created_at", "deleted_at", "asset_type"]]
        ])
        normal_merged_df = pd.merge(
            how="inner",
            left=merged_df,
            left_on="lcs_device_type",
            right=full_template_df,
            right_on="asset_type",
        )

        if protocol_id == 34:
            merged_df["el_key"] = 1
        else:
            merged_df["el_key"] = 0
        template_df["el_key"] = 1 * (template_df["asset_type"] == "el")

        el_merged_df = pd.merge(
            how="inner",
            left=merged_df[merged_df["el_key"] == 1],
            left_on="el_key",
            right=template_df[template_df["el_key"] == 1],
            right_on="el_key",
        )

        if protocol_id == 35:
            merged_df["ext_key"] = 1 * (
                (merged_df["legacy_device_type"] != "other")
            )
        else:
            merged_df["ext_key"] = 0
        template_df["ext_key"] = 1 * (template_df["asset_type"] == "ext")

        ext_merged_df = pd.merge(
            how="inner",
            left=merged_df[merged_df["ext_key"] == 1],
            left_on="ext_key",
            right=template_df[template_df["ext_key"] == 1],
            right_on="ext_key",
        )

        merged_df = pd.concat([
            normal_merged_df,
            el_merged_df,
            ext_merged_df
        ])

        timing_mask = (merged_df["created_at_x"] >= merged_df["created_at_y"]) & (
            (merged_df["created_at_x"] < merged_df["deleted_at"]) | (
                merged_df["deleted_at"].isnull()
            )
        )
        merged_df = merged_df[timing_mask]
        if merged_df.empty:
            print("----> No tasks")
            continue
        for (test_id,), tasks_df in merged_df.groupby(["id"]):
            task_record_df = portal_client.query_to_pandas(
                """SELECT tr.test_id, tr.task_id, t.asset_type
                FROM topaz.api_service_tasks_record AS tr
                LEFT JOIN topaz.api_service_tasks AS t
                ON tr.task_id = t.id
                WHERE test_id = %(test_id)s""",
                {"test_id": int(test_id)}
            )
            additional_assets = set(task_record_df["asset_type"]) - set(tasks_df["asset_type"]) - {''}
            if len(additional_assets) > 0:
                changed_asset_type_records.append(test_id)
            joined_tasks = pd.merge(
                how="left",
                left=tasks_df,
                left_on=["id", "task_id"],
                right=task_record_df,
                right_on=["test_id", "task_id"]
            )
            missing_tasks = joined_tasks[joined_tasks["asset_type_y"].isnull()]

            if not missing_tasks.empty:
                missing_task_records = True
                break

        if missing_task_records:
            break
    if missing_task_records:
        missing_tasks_string = ", ".join(str(int(task)) for task in missing_tasks["task_id"].values)
        print(f"{test_id} missing tasks {missing_tasks_string}")
        new_row = pd.DataFrame({
            "site_id": [site_id],
        })
        missing_df = pd.concat([missing_df, new_row])
        missing_df.to_csv("data/2025-08-04 Masterfire Missing Tasks New 2024.csv", index=False)
    if (len(changed_asset_type_records) > 0):
        additional_assets_string = ", ".join(str(int(test)) for test in changed_asset_type_records)
        print(f"Additional assets on tests: {additional_assets_string}")

4169
4308
5344
5802
Additional assets on tests: 10353040
8069
Additional assets on tests: 11162540
8295
Additional assets on tests: 11263176, 11263200
8296
Additional assets on tests: 11402884
8466
11328823 missing tasks 588, 589, 591, 592, 631, 2645
Additional assets on tests: 9396703, 9396707, 9397046, 9397054, 9397072, 9397076, 9397124, 9397135, 9397155, 9397195, 9397203, 9397230, 9397336, 9397421, 9398423, 9398431, 9398439, 9398463, 9398499, 9398532, 9398547, 9398588, 9398622, 9398626, 9398847, 9398869, 9398912, 9398949, 9398976, 9399001, 9399062, 9399089, 9399195, 9399208, 9399261, 9399279, 9399327, 9399394, 9399403, 9399425, 9399438, 9399460, 9399485, 9399503, 9399545, 9399560, 9399593, 9399730, 9399752, 9399775, 9399786, 9399810, 9399820, 9399836, 9399855, 9399865, 9399908, 9399939, 9399966, 9400604, 9400620, 9400626, 9400673, 9402340, 9402410, 9402444, 9402453, 9402468, 9402496, 9402532, 9402569, 9402596, 9402613, 9402630, 9402635, 9402650, 9402712, 9402713, 9402733, 9402739, 9

  tests_df['panel'] = tests_df['panel'].fillna(-999)
  tests_df['loop'] = tests_df['loop'].fillna(-999)
  tests_df['sub_address'] = tests_df['sub_address'].fillna(-999)


Additional assets on tests: 9340504, 9340507, 9340508, 9340509, 9340510, 9340511, 9340512, 9340514, 9340515, 9340516, 9340518, 9340520, 9340521, 9340522, 9340523, 9340524, 9340526, 9340527, 9340529, 9340530, 9340532, 9340533, 9340535, 9340536, 9340538, 9340539, 9340540, 9340541, 9340542, 9340543
22371
22373
22374
11860112 missing tasks 2344, 2345, 2346, 2347, 2348, 2349, 2353
Additional assets on tests: 11859638, 11859900, 11859959, 11860037, 11860053, 11860112
22375
13056022 missing tasks 2332, 2333, 2334, 2335, 2336, 2337, 2351
Additional assets on tests: 11261795, 11261810, 11261838, 11261878, 11261907, 11261919, 11262104, 11262162, 11262217, 11262273, 11262310, 11262405, 11262437, 11262466, 11262488, 11262517, 11262524, 11262533, 11262551, 11264308, 11264339, 11264418, 11264482, 11264532, 11264545, 11264653, 11264994, 11265114, 11265159, 11265185, 11265212, 11265238, 11265247, 11265256, 11265265, 11265324, 11265342, 11265418, 11265447, 11265517, 11265543, 11265556, 11265572, 112656

  tests_df['sub_address'] = tests_df['sub_address'].fillna(-999)


10070735 missing tasks 593, 594, 595, 596, 597, 632
Additional assets on tests: 10068765, 10068958, 10069027, 10069354, 10069469, 10069498, 10069531, 10069573, 10069602, 10069693, 10069758, 10069825, 10069845, 10069865, 10069885, 10069945, 10069966, 10070003, 10070034, 10070052, 10070068, 10070085, 10070104, 10070129, 10070160, 10070206, 10070238, 10070289, 10070315, 10070367, 10070450, 10070531, 10070584, 10070671, 10070716, 10070735
28784
30057
11747346 missing tasks 2332, 2333, 2334, 2335, 2336, 2337, 2351
Additional assets on tests: 11743516, 11743584, 11743683, 11743787, 11743855, 11744029, 11744127, 11744179, 11744302, 11744448, 11744469, 11744511, 11744672, 11746748, 11746786, 11746881, 11747060, 11747253, 11747283, 11747295, 11747305, 11747318, 11747321, 11747333, 11747336, 11747345
30081
11369076 missing tasks 2344, 2345, 2346, 2347, 2348, 2349, 2353
Additional assets on tests: 11369076
30220
30246
Additional assets on tests: 10787288, 10787339, 10787367, 10787373, 10787453, 1

  tests_df['sub_address'] = tests_df['sub_address'].fillna(-999)
  toc_df['sub_address'] = toc_df['sub_address'].fillna(-999)


13322607 missing tasks 2332, 2333, 2334, 2335, 2336, 2337, 2351
Additional assets on tests: 11483627, 11483657, 11483714, 11483749, 11483819, 11483894, 11483954, 11483979, 11484017, 11484029, 11484050, 11484058, 11484064, 11484073, 11484077, 11484080, 11485781, 11485828, 11485918, 11486157, 11486203, 11486245, 11486303, 11486369, 11486438, 11486514, 11486621, 11486695, 11486757, 11488394, 11488526, 11488555, 11488598, 11488670, 11488922, 11488983, 11489522, 11489606, 13320837, 13320899, 13320959, 13321047, 13321061, 13321078, 13321090, 13321098, 13321118, 13321155, 13321169, 13321232, 13321300, 13321356, 13321369, 13321384, 13321411, 13321917, 13321924, 13321965, 13322029, 13322125, 13322154, 13322205, 13322216, 13322239, 13322245, 13322262, 13322281, 13322293, 13322312, 13322454, 13322462, 13322506, 13322519, 13322556, 13322605
48216
13482327 missing tasks 2332, 2333, 2334, 2335, 2336, 2337, 2351
Additional assets on tests: 11786951, 11786957, 11786959, 11786968, 11786969, 11787235, 1

In [6]:
sites_list = portal_client.query_to_pandas(
    """WITH RECURSIVE category_path AS (
    SELECT id, name, parent_id, 0 AS depth
    FROM api_clients
    WHERE id = 118

    UNION ALL

    SELECT c.id, c.name, c.parent_id, cp.depth + 1
    FROM api_clients c
    INNER JOIN category_path cp ON cp.id = c.parent_id
    )
    SELECT category_path.id AS client_id, site.id AS site_id
    FROM category_path
    INNER JOIN topaz.api_sites AS site
    ON category_path.id = site.client_id
    WHERE toc_id IS NOT NULL
    AND template_id IS NOT NULL"""
)