In [1]:
from IPython.display import clear_output
from bidict import bidict
from sentence_transformers import SentenceTransformer, util
from sqlalchemy import select
import sys

sys.path.append("..")

from src.auth.aurora import get_sales_client, get_portal_client
from src.db.database import session
from src.db.models import Client

In [2]:
stmt = select(Client.sales_id).distinct()
matched_ids = session.scalars(stmt).all()

In [3]:
portal_db = get_portal_client()
sales_client = get_sales_client()

In [4]:
placeholders = ', '.join(['%s'] * len(matched_ids))
sales_client_df = sales_client.query_to_pandas(
    f"""SELECT
        name,
        device_id,
        client_id,
        start_date,
        COALESCE(cancel_date, expiry_date) AS end_date
    FROM sales.t_subscription
    LEFT JOIN sales.t_orders
    USING(order_number)
    LEFT JOIN sales.t_cancellation
    USING(subscription_id)
    LEFT JOIN sales.t_client
    USING(client_id)
    WHERE client_id NOT IN ({placeholders})
    AND start_date < CURDATE()""",
    params=matched_ids
)

In [5]:
for (client_id, name), client_df in sales_client_df.groupby(["client_id", "name"]):
    # Process each group for the client
    print(f"Client ID: {client_id}, Name: {name}")
    sales_device_list = client_df["device_id"].unique().tolist()
    placeholders = ', '.join(['%s'] * len(sales_device_list))
    portal_devices_df = portal_db.query_to_pandas(
        f"""SELECT
            device_serial,
            client_id,
            created_at,
            deleted_at
        FROM topaz.api_devices
        WHERE device_serial IN ({placeholders})""",
        sales_device_list
    )
    allowed_matches = {}
    for sales_device in client_df["device_id"].unique():
        if sum(portal_devices_df["device_serial"] == sales_device) == 0:
            print(f"Device {sales_device} not found in portal devices.")
            continue
        display(client_df[client_df["device_id"] == sales_device])
        display(portal_devices_df[portal_devices_df["device_serial"] == sales_device])
        client_ids = portal_devices_df[
            portal_devices_df["device_serial"] == sales_device
        ]["client_id"].unique().tolist()
        placeholders = ', '.join(['%s'] * len(client_ids))
        recursion_df = portal_db.query_to_pandas(
            f"""WITH RECURSIVE category_path AS (
            SELECT id, name, parent_id, 0 AS depth
            FROM api_clients
            WHERE id IN ({placeholders})

            UNION ALL

            SELECT c.id, c.name, c.parent_id, cp.depth + 1
            FROM api_clients c
            INNER JOIN category_path cp ON c.id = cp.parent_id
            )
            SELECT * FROM category_path;""",
            client_ids
        )
        display(recursion_df)
        for _, row in recursion_df.iterrows():
            if row["id"] not in allowed_matches:
                allowed_matches[row["id"]] = row["name"]
    sys.stdout.flush()
    user_input = input("What is the match: ")
    if user_input == "skip":
        print("Skipping this match.")
    else:
        new_client = Client(
            sales_id=client_id,
            portal_id=user_input,
        )
        session.add(new_client)
        session.commit()
    clear_output(wait=True)