In [1]:
import json
import time
from pathlib import Path

import django
import pandas as pd
import pytz
import requests

django.setup()

In [2]:
from clients.models import Credential
from django.conf import settings
from django.db.models import BooleanField, Case, F, Q, When
from django.utils import timezone
from reports.models import BookingsFinancialData, SpecificStopSalesRules

from utils import DistributorApi

In [3]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.float_format", "{:.2f}".format)

In [4]:
base_dir = Path(".").resolve()
data_dir = Path(".").resolve() / "data"

In [5]:
def map_credential_group(row):
    df = pd.read_json(data_dir / "credential_group.json")
    mapping = dict(zip(df["id"], df["credential_list"]))

    if row["credential_level"] == 0:
        return ["All"]
    if row["credential_level"] == 2:
        result = []
        for group_id in row["credential_id"]:
            group_id = int(group_id)

            if group_id in mapping:
                result.extend(mapping[group_id])

        return list(set(result))
    else:
        return row["credential_id"]

In [6]:
def get_stopsales_df(credentials):
    queryset = (
        SpecificStopSalesRules.objects.filter(
            levels_rules__prv__t=1,
            levels_rules__hot__t=1,
            tag__in=[-1, 1, 3],
            organization="lgt",
            active=True,
        )
        .annotate(
            credential_level=F("levels_rules__cli__t"),
            credential_id=F("levels_rules__cli__l"),
            provider_id=F("levels_rules__prv__l"),
            hotel_id=F("levels_rules__hot__l"),
            has_attributes=Case(
                When(
                    Q(levels_rules__cp__gt=0)
                    | Q(levels_rules__ps__t__gt=0)
                    | Q(levels_rules__rat__t__gt=0)
                    | Q(levels_rules__mrk__t__gt=0)
                    | Q(levels_rules__mel__t__gt=0)
                    | Q(levels_rules__cid__t__gt=0)
                    | Q(levels_rules__bod__t__gt=0)
                    | Q(levels_rules__rrg__t__gt=0)
                    | Q(levels_rules__rel__t__gt=0)
                    | Q(levels_rules__dow__t__gt=0)
                    | Q(levels_rules__age__t__gt=0)
                    | Q(levels_rules__room__t__gt=0)
                    | Q(levels_rules__non__t__gt=0)
                    | Q(levels_rules__hou__t__gt=0),
                    then=True,
                ),
                default=False,
                output_field=BooleanField(),
            ),
            last_updated=F("last_date"),
        )
        .values(
            "rule_id",
            "active",
            "last_updated",
            "credential_level",
            "credential_id",
            "provider_id",
            "hotel_id",
            "has_attributes",
        )
    )

    df = pd.DataFrame(list(queryset))
    df["last_updated"] = pd.to_datetime(df["last_updated"])
    df["last_year"] = df["last_updated"] - timezone.timedelta(days=365)
    df["credential_id"] = df.apply(map_credential_group, axis=1)
    df = (
        df.explode("credential_id")
        .explode("provider_id")
        .explode("hotel_id")
        .reset_index(drop=True)
    )

    df = df[df["hotel_id"].notna()]
    df["hotel_id"] = df["hotel_id"].astype(int)

    df = df.merge(credentials, how="left")

    df = df[
        ((df["enabled"] == True) | (df["credential_id"] == "All"))
        & (~df["region"].str.contains("veci", case=False, na=False))
    ]

    return df

In [7]:
credentials = Credential.objects.annotate(
    credential_id=F("id"),
    credential_name=F("name"),
    region=F("client__region_id"),
).values("credential_id", "credential_name", "enabled", "region")

df_credentials = pd.DataFrame(list(credentials))

In [8]:
df_stopsales = get_stopsales_df(df_credentials)
display(df_stopsales.head())

Unnamed: 0,rule_id,active,credential_level,credential_id,provider_id,hotel_id,has_attributes,last_updated,last_year,enabled,credential_name,region
110,509914,True,1,34020,BCONG,625423,False,2024-07-31,2023-08-01,True,BOOKOPRO,NORTH AMERICA
111,509914,True,1,34020,BCONG,3758660,False,2024-07-31,2023-08-01,True,BOOKOPRO,NORTH AMERICA
112,509914,True,1,34020,BCONG,3936090,False,2024-07-31,2023-08-01,True,BOOKOPRO,NORTH AMERICA
113,509914,True,1,34020,BCONG,556770,False,2024-07-31,2023-08-01,True,BOOKOPRO,NORTH AMERICA
114,509914,True,1,34020,BCONG,2512893,False,2024-07-31,2023-08-01,True,BOOKOPRO,NORTH AMERICA


In [9]:
min_booking_date = df_stopsales["last_year"].min()
max_booking_date = df_stopsales["last_updated"].max()

In [10]:
bookings = (
    BookingsFinancialData.objects.filter(
        booking_date__date__gte=min_booking_date,
        booking_date__date__lte=max_booking_date,
    )
    .exclude(hotel_id__isnull=True)
    .exclude(provider_id__isnull=True)
    .exclude(credential__client__region__name__icontains="veci")
    .values(
        "booking_date",
        "credential_id",
        "provider_id",
        "hotel_id",
        "revenue",
    )
)

df_bookings = pd.DataFrame(list(bookings))
df_bookings["booking_date"] = df_bookings["booking_date"].dt.tz_localize(None)
df_bookings["hotel_id"] = df_bookings["hotel_id"].astype(int)
df_bookings.set_index(
    ["credential_id", "provider_id", "hotel_id"], inplace=True
)
df_bookings.sort_index(inplace=True)

display(df_bookings.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,booking_date,revenue
credential_id,provider_id,hotel_id,Unnamed: 3_level_1,Unnamed: 4_level_1
10132,ABR2,104924,2024-09-12 22:17:51.957,0.0
10132,ABR2,127222,2022-08-31 18:29:13.473,99.11
10132,ABR2,182386,2022-07-18 14:46:59.230,108.13
10132,ABR2,231416,2022-06-08 10:09:49.747,129.62
10132,ABR2,444249,2022-06-09 13:40:28.740,187.32


In [11]:
from joblib import Memory

memory = Memory("cachedir", verbose=0)


@memory.cache
def filter_bookings(df, rule):
    if rule["credential_id"] == "All":
        return df.xs(
            (rule["provider_id"], rule["hotel_id"]),
            level=["provider_id", "hotel_id"],
        ).copy()

    return df.loc[
        (rule["credential_id"], rule["provider_id"], rule["hotel_id"])
    ].copy()

In [12]:
@memory.cache
def get_metrics(bookings, start_date, end_date):
    """Calculate metrics within a specific date range."""

    bookings_filtered = bookings[
        (bookings["booking_date"] >= start_date) & (bookings["booking_date"] <= end_date)
    ].copy()

    total_revenue = bookings_filtered["revenue"].sum()

    # Group by week
    bookings_filtered["week"] = bookings_filtered["booking_date"].dt.to_period("W")
    revenue_per_week = bookings_filtered.groupby("week")["revenue"].sum()
    avg_revenue_per_week = revenue_per_week.mean() if not revenue_per_week.empty else 0
    
    # Get period covered
    first_date = (
        bookings_filtered["booking_date"].min() if not bookings_filtered.empty else None
    )
    last_date = (
        bookings_filtered["booking_date"].max() if not bookings_filtered.empty else None
    )
    period_covered = (
        f"{first_date.date()} to {last_date.date()}"
        if first_date and last_date
        else "No sales during period"
    )
    number_of_weeks_covered = (
        (last_date - first_date).days // 7 if first_date and last_date else 0
    )

    return {
        "total_revenue": total_revenue,
        "avg_revenue_per_week": avg_revenue_per_week,
        "period_covered": period_covered,
        "number_of_weeks_covered": number_of_weeks_covered,
    }

In [13]:
start_time = time.time()

In [14]:
data = []

for _, rule in df_stopsales.iterrows():
    try:
        bookings = filter_bookings(df_bookings, rule)

        # Pre stopsales metrics
        pre_stopsales = get_metrics(
            bookings,
            rule["last_year"],
            rule["last_updated"],
        )

        # Posts stopsales metrics
        post_stopsales = get_metrics(
            bookings,
            rule["last_updated"],
            bookings["booking_date"].max(),
        )

        data.append(
            {
                "rule_id": rule["rule_id"],
                "credential_level": rule["credential_level"],
                "credential_id": rule["credential_id"],
                "provider_id": rule["provider_id"],
                "hotel_id": rule["hotel_id"],
                "last_updated": rule["last_updated"],
                "has_attributes": rule["has_attributes"],
                "revenue_last_12_months": pre_stopsales["total_revenue"],
                "avg_revenue_per_week_last_12_months": pre_stopsales[
                    "avg_revenue_per_week"
                ],
                "number_of_weeks_covered": pre_stopsales[
                    "number_of_weeks_covered"
                ],
                "period_covered": pre_stopsales["period_covered"],
                "revenue_post_stopsales": post_stopsales["total_revenue"],
                "avg_revenue_per_week_post_stopsales": post_stopsales[
                    "avg_revenue_per_week"
                ],
            }
        )

    except KeyError:
        data.append(
            {
                "rule_id": rule["rule_id"],
                "credential_level": rule["credential_level"],
                "credential_id": rule["credential_id"],
                "provider_id": rule["provider_id"],
                "hotel_id": rule["hotel_id"],
                "last_updated": rule["last_updated"],
                "has_attributes": rule["has_attributes"],
                "revenue_last_12_months": 0,
                "avg_revenue_per_week_last_12_months": 0,
                "number_of_weeks_covered": 0,
                "period_covered": 0,
                "revenue_post_stopsales": 0,
                "avg_revenue_per_week_post_stopsales": 0,
            }
        )

  return self._cached_call(args, kwargs, shelving=False)[0]
  return self._cached_call(args, kwargs, shelving=False)[0]
  return self._cached_call(args, kwargs, shelving=False)[0]
  return self._cached_call(args, kwargs, shelving=False)[0]
  return self._cached_call(args, kwargs, shelving=False)[0]


KeyboardInterrupt: 

In [None]:
end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")

In [None]:
df_result = pd.DataFrame(data)

In [None]:
display(
    df_result[
        (df_result.revenue_post_stopsales > 0) & (~df_result.has_attributes)
    ].tail(10)
)

In [None]:
display(
    df_result[
        (df_result.credential_id.isin(["33914", "33915"]))
        & (df_result.credential_level == 2)
    ].tail(10)
)

In [None]:
df_result[
    (df_result.credential_id.isin(["33914", "33915"]))
    & (df_result.credential_level == 2)
].shape