In [1]:
import json
from datetime import datetime
from pathlib import Path

import pandas as pd
import requests
from sqlalchemy import select
from sqlalchemy.orm import aliased

from database import Database
from utils import DistributorApi

In [2]:
base_dir = Path(".").resolve()
data_dir = base_dir / "data"

In [3]:
api = DistributorApi()

In [4]:
rules = api.get_all_rules()
df_rules = pd.json_normalize(rules, sep="_")
df_rules = df_rules[
    [
        "id",
        "name",
        "description",
        "tag",
        "edit_state",
        "obsolete",
        "updated_by",
        "updated_on",
        "credential_level",
        "credential_list",
        "provider_level",
        "provider_list",
        "hotel_level",
        "hotel_list",
        "destination_level",
        "destination_list",
        "rate",
        "refundable",
        "market_level",
        "market_list",
        "meal_level",
        "meal_list",
        "check_in_level",
        "check_in_from",
        "check_in_to",
        "booking_date_level",
        "booking_date_from",
        "booking_date_to",
        "range_level",
        "range_from",
        "range_to",
        "max_release",
        "days_of_week_level",
        "days_of_week_list",
        "age",
        "room_level",
        "room_list",
        "num_of_nights_level",
        "num_of_nights_list",
        "hours_level",
        "hours_list",
        "dynamic_commission",
    ]
]

In [5]:
with pd.option_context("display.max_columns", None):
    display(df_rules.head())

Unnamed: 0,id,name,description,tag,edit_state,obsolete,updated_by,updated_on,credential_level,credential_list,provider_level,provider_list,hotel_level,hotel_list,destination_level,destination_list,rate,refundable,market_level,market_list,meal_level,meal_list,check_in_level,check_in_from,check_in_to,booking_date_level,booking_date_from,booking_date_to,range_level,range_from,range_to,max_release,days_of_week_level,days_of_week_list,age,room_level,room_list,num_of_nights_level,num_of_nights_list,hours_level,hours_list,dynamic_commission
0,686085,Blocked Provider for Hotel 6690,Automatic providers blocked algorithm.,0,1,False,AzureFunctions,12-10-2024 10:02:30,0,,1,[CNN2],1,[6690],0,,0,0,0,,0,,0,,,0,,,0,0,0,0,0,,0,0,,0,,0,,False
1,431443,BL- Apartamentos Masaru,CDH-19736,1,1,False,maria.ulloa@traveltino.com,08-04-2024 17:47:04,1,"[11986, 11992, 11993, 33284, 10206, 11771, 119...",1,[BCONG],1,[806],0,[],0,0,0,,0,,0,,,0,,,0,0,0,0,0,,0,0,,0,,0,,False
2,395508,SnapTravel UAE - PTRJ2,Close PRTJ2 - Supplier Errors - (Gisela 03.01.24),4,1,False,gisela.purra@smyrooms.com,03-01-2024 14:24:22,1,"[33914, 33915]",1,[PTRJ2],0,,0,[],0,0,0,,0,,0,,,0,,,0,0,0,0,0,,0,0,,0,,0,,False
3,692147,Blocked Provider for Hotel 13462,Automatic providers blocked algorithm.,0,1,False,AzureFunctions,17-10-2024 10:43:33,0,,1,"[OLV2, FLYB2B]",1,[13462],0,,0,0,0,,0,,0,,,0,,,0,0,0,0,0,,0,0,,0,,0,,False
4,414798,Cierre Agoda + HBS TPS,CDH-18980,1,1,False,maria.ulloa@traveltino.com,25-03-2024 16:17:10,1,"[33238, 5226, 8550, 11655, 11945]",1,[YPL],1,[2368],0,[],0,0,0,,0,,0,,,0,,,0,0,0,0,0,,0,0,,0,,0,,False


In [6]:
# Set filter to level 1 rules
filter_lvl1 = (
    (df_rules["tag"] == 1)
    & (df_rules["credential_level"] == 1)
    & (df_rules["provider_level"] == 1)
    & (df_rules["hotel_level"] == 1)
    & (df_rules["destination_level"] == 0)
    & (df_rules["rate"] == 0)
    & (df_rules["refundable"] == 0)
    & (df_rules["market_level"] == 0)
    & (df_rules["meal_level"] == 0)
    & (df_rules["check_in_level"] == 0)
    & (df_rules["booking_date_level"] == 0)
    & (df_rules["range_level"] == 0)
    & (df_rules["max_release"] == 0)
    & (df_rules["days_of_week_level"] == 0)
    & (df_rules["age"] == 0)
    & (df_rules["room_level"] == 0)
    & (df_rules["num_of_nights_level"] == 0)
    & (df_rules["hours_level"] == 0)
)

In [25]:
df_rules_lvl1 = df_rules[filter_lvl1].copy()
df_rules_lvl1.shape

(515, 42)

In [26]:
df_rules_lvl1["credential_count"] = df_rules_lvl1["credential_list"].apply(len)
df_rules_lvl1.insert(10, "credential_count", df_rules_lvl1.pop("credential_count"))
df_rules_lvl1["provider_count"] = df_rules_lvl1["provider_list"].apply(len)
df_rules_lvl1.insert(13, "provider_count", df_rules_lvl1.pop("provider_count"))
df_rules_lvl1["hotel_count"] = df_rules_lvl1["hotel_list"].apply(len)
df_rules_lvl1.insert(16, "hotel_count", df_rules_lvl1.pop("hotel_count"))

In [27]:
df_rules_lvl1 = df_rules_lvl1.sort_values(by="credential_count", ascending=False)

In [29]:
df_rules_lvl1.loc[df_rules_lvl1["id"].isin(['395802', '395800'])]

Unnamed: 0,id,name,description,tag,edit_state,obsolete,updated_by,updated_on,credential_level,credential_list,...,days_of_week_level,days_of_week_list,age,room_level,room_list,num_of_nights_level,num_of_nights_list,hours_level,hours_list,dynamic_commission
2127,395802,Campaña BUC- VS GANDIA PALACE,CDH-18007\nHPD-1728-Provider EXPRDS added\nHPD...,1,1,False,AzureFunctions,10-09-2024 11:57:18,1,[11617],...,0,,0,0,,0,,0,,False
2171,395800,Campaña BUC- VS GANDIA PALACE,CDH-18007,1,1,False,maria.ulloa@traveltino.com,11-03-2024 15:21:46,1,[11617],...,0,,0,0,,0,,0,,False


In [10]:
df_rules_lvl1["credential_list_tuple"] = df_rules_lvl1["credential_list"].apply(tuple)
df_rules_lvl1["duplicates"] = df_rules_lvl1.groupby("credential_list_tuple")[
    "credential_list_tuple"
].transform("count")

In [11]:
df_exploded = (
    df_rules_lvl1.explode("credential_list")
    .explode("provider_list")
    .explode("hotel_list")
)
df_exploded.shape

(1727790, 47)

In [12]:
df_exploded["combination_zip"] = list(
    zip(
        df_exploded["credential_list"],
        df_exploded["provider_list"],
        df_exploded["hotel_list"],
    )
)

In [13]:
df_exploded["is_duplicate"] = df_exploded.duplicated(
    subset="combination_zip", keep="first"
)

In [14]:
duplicate_ids = (
    df_exploded[df_exploded["is_duplicate"]]
    .groupby("combination_zip")["id"]
    .apply(list)
    .reset_index(name="duplicate_ids")
)

In [15]:
df_exploded = df_exploded.merge(duplicate_ids, on="combination_zip", how="left")

In [16]:
df_exploded.shape

(1727790, 50)

In [17]:
duplicates = df_exploded[df_exploded["is_duplicate"]]
duplicates.insert(0, "duplicate_ids", duplicates.pop("duplicate_ids"))

In [18]:
df_exploded.drop_duplicates(subset=["combination_zip"], keep="first", inplace=True)

In [19]:
df_exploded = (
    df_exploded.groupby("id")
    .agg(
        {
            "credential_list": lambda x: list(set(x)),
            "provider_list": lambda x: list(set(x)),
            "hotel_list": lambda x: list(set(x)),
            "duplicate_ids": "first",
        }
    )
    .reset_index()
)

In [20]:
df_exploded.shape

(497, 5)

In [21]:
duplicates = (
    duplicates.groupby("id")
    .agg(
        {
            "credential_list": lambda x: list(set(x)),
            "provider_list": lambda x: list(set(x)),
            "hotel_list": lambda x: list(set(x)),
            "duplicate_ids": "first",
        }
    )
    .reset_index()
)

In [22]:
change_data = []

for index, row in df_rules_lvl1.iterrows():
    id_ = row["id"]

    # Original lists for this ID
    original_credential_list = row["credential_list"]
    original_provider_list = row["provider_list"]
    original_hotel_list = row["hotel_list"]

    # Get duplicate IDs for the current ID
    duplicate_ids = duplicates.loc[duplicates["id"] == id_, "duplicate_ids"].values
    duplicate_ids = duplicate_ids[0] if duplicate_ids.size > 0 else []

    # Updated lists after deletion (removing duplicates)
    updated_credential_list = (
        df_exploded.loc[df_exploded["id"] == id_, "credential_list"]
        .apply(lambda x: x if isinstance(x, list) else [])
        .explode()
        .unique()
        .tolist()
    )
    updated_provider_list = (
        df_exploded.loc[df_exploded["id"] == id_, "provider_list"]
        .apply(lambda x: x if isinstance(x, list) else [])
        .explode()
        .unique()
        .tolist()
    )
    updated_hotel_list = (
        df_exploded.loc[df_exploded["id"] == id_, "hotel_list"]
        .apply(lambda x: x if isinstance(x, list) else [])
        .explode()
        .unique()
        .tolist()
    )

    # Find deleted items
    deleted_credential_list = list(
        set(original_credential_list) - set(updated_credential_list)
    )
    deleted_provider_list = list(
        set(original_provider_list) - set(updated_provider_list)
    )
    deleted_hotel_list = list(set(original_hotel_list) - set(updated_hotel_list))

    # Append the results to the change data list
    change_data.append(
        {
            "id": id_,
            "duplicate_ids": duplicate_ids,
            "credential_list": original_credential_list,
            "updated_credential_list": updated_credential_list,
            "deleted_credential_list": deleted_credential_list,
            "provider_list": original_provider_list,
            "updated_provider_list": updated_provider_list,
            "deleted_provider_list": deleted_provider_list,
            "hotel_list": original_hotel_list,
            "updated_hotel_list": updated_hotel_list,
            "deleted_hotel_list": deleted_hotel_list,
            "edit_state": row["edit_state"],
            "obsolete": row["obsolete"],
            "name": row["name"],
            "description": row["description"],
            "tag": row["tag"],
            "credential_choices": row["credential_level"],
            "credential_count": row["credential_count"],
            "rate": row["rate"],
            "provider_level": row["provider_level"],
            "provider_count": row["provider_count"],
            "hotel_level": row["hotel_level"],
            "hotel_count": row["hotel_count"],
            "destination_level": row["destination_level"],
            "destination_list": row["destination_list"],
            "refundable": row["refundable"],
            "meal_level": row["meal_level"],
            "meal_list": row["meal_list"],
            "market_level": row["market_level"],
            "market_list": row["market_list"],
            "dynamic_commission": row["dynamic_commission"],
            "check_in_level": row["check_in_level"],
            "check_in_from": row["check_in_from"],
            "check_in_to": row["check_in_to"],
            "booking_date_level": row["booking_date_level"],
            "booking_date_from": row["booking_date_from"],
            "booking_date_to": row["booking_date_to"],
            "range_level": row["range_level"],
            "range_from": row["range_from"],
            "range_to": row["range_to"],
            "max_release": row["max_release"],
            "days_of_week_level": row["days_of_week_level"],
            "days_of_week_list": row["days_of_week_list"],
            "age": row["age"],
            "room_level": row["room_level"],
            "room_list": row["room_list"],
            "num_of_nights_level": row["num_of_nights_level"],
            "num_of_nights_list": row["num_of_nights_list"],
            "hours_level": row["hours_level"],
            "hours_list": row["hours_list"],
            "updated_by": row["updated_by"],
            "updated_on": row["updated_on"],
        }
    )

In [23]:
change_tracker = pd.DataFrame(change_data)
change_tracker.to_csv("change_tracker.csv", index=False)

In [24]:
df_rules_lvl1.loc[df_rules_lvl1["id"].isin(['395802', '395800'])]

Unnamed: 0,id,name,description,tag,edit_state,obsolete,updated_by,updated_on,credential_level,credential_list,...,age,room_level,room_list,num_of_nights_level,num_of_nights_list,hours_level,hours_list,dynamic_commission,credential_list_tuple,duplicates
2127,395802,Campaña BUC- VS GANDIA PALACE,CDH-18007\nHPD-1728-Provider EXPRDS added\nHPD...,1,1,False,AzureFunctions,10-09-2024 11:57:18,1,[11617],...,0,0,,0,,0,,False,"(11617,)",3
2171,395800,Campaña BUC- VS GANDIA PALACE,CDH-18007,1,1,False,maria.ulloa@traveltino.com,11-03-2024 15:21:46,1,[11617],...,0,0,,0,,0,,False,"(11617,)",3
