In [1]:
from pathlib import Path
import json
import time

import django
import pandas as pd
import numpy as np
import pytz
import requests

django.setup()

In [2]:
from django.conf import settings
from django.db.models import F, Q, Case, When, BooleanField, Sum
from django.utils.timezone import timedelta

from reports.models import SpecificStopSalesRules, BookingsFinancialData
from hotel.models import Hotel, Chain, Provider
from clients.models import Credential

from utils import DistributorApi

In [3]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.float_format", "{:.2f}".format)

In [4]:
base_dir = Path(".").resolve()
data_dir = Path(".").resolve() / "data"

In [5]:
def map_credential_group(row):
    if row["credential_level"] == 0:
        return ["All"]
    elif row["credential_level"] == 2:
        result = []
        for group_id in row["credential_id"]:
            group_id = int(group_id)

            if group_id in credential_mapping:
                result.extend(credential_mapping[group_id])

        return list(set(result))
    else:
        return row["credential_id"]

In [6]:
def map_providers(row):
    if row["provider_level"] == 0:
        return ["All"]
    elif row["provider_level"] == 2:
        return list(df_providers[~df_providers.direct]["code"])
    elif row["provider_level"] == 3:
        return list(df_providers[df_providers.direct]["code"])
    else:
        return row["provider_id"]

In [7]:
def map_hotels(row):
    row["chain_id"] = None

    if row["hotel_level"] == 0:
        row["hotel_id"] = ["All"]
        row["chain_id"] = ["All"]
    elif row["hotel_level"] == 1:
        row["chain_id"] = []
    elif row["hotel_level"] == 2:
        row["chain_id"] = row["hotel_id"]
        row["hotel_id"] = ["All"]  
    else:
        raise ValueError("Invalid Hotel Level")

    return row

In [8]:
def map_destinations(row):
    row["city"] = None
    row["country"] = None
    
    if row["destination_level"] == 0:
        row["city"] = ["All"]
        row["country"] = ["All"]
    elif row["destination_level"] == 1:
        row["city"] = row["destination"]
        row["country"] = []
    elif row["destination_level"] == 2:
        row["city"] = ["All"]
        row["country"] = row["destination"]
    else:
        raise ValueError("Invalid Destination Level")

    return row

In [9]:
def remove_none_values(ids):
    if ids is None:
        return None

    return [id for id in ids if id is not None]

In [10]:
def remove_inactive_credentials(credential_ids):
    filtered_ids = []
    for credential_id in credential_ids:
        if credential_id == "All":
            filtered_ids.append(credential_id)
            continue

        credential_info = credential_dict.get(credential_id)
        if credential_info:
            enabled = credential_info["enabled"]
            region = credential_info["region"]

            if (enabled or credential_id == "All") and (
                region is None or "veci" not in region.lower()
            ):
                filtered_ids.append(credential_id)

    return filtered_ids

In [11]:
df_credential_group = pd.read_json(data_dir / "credential_group.json")
credential_mapping = dict(
    zip(df_credential_group["id"], df_credential_group["credential_list"])
)

In [12]:
credentials_queryset = Credential.objects.annotate(
    credential_id=F("id"),
    credential_name=F("name"),
    region=F("client__region_id"),
).values("credential_id", "credential_name", "enabled", "region")

df_credentials = pd.DataFrame(list(credentials_queryset))
credential_dict = df_credentials.set_index("credential_id")[
    ["enabled", "region"]
].to_dict("index")

In [13]:
providers_queryset = Provider.objects.values("code", "direct")
df_providers = pd.DataFrame(list(providers_queryset))

In [14]:
hotels_queryset = Hotel.objects.values(
    "id", "name", "country_id", "city_id", "chain_id"
)
df_hotels = pd.DataFrame(list(hotels_queryset))

In [15]:
stopsales_queryset = (
    SpecificStopSalesRules.objects.filter(
        tag__in=[-1, 1, 3],
        organization="lgt",
        active=True,
    )
    .annotate(
        credential_level=F("levels_rules__cli__t"),
        credential_id=F("levels_rules__cli__l"),
        provider_level=F("levels_rules__prv__t"),
        provider_id=F("levels_rules__prv__l"),
        hotel_level=F("levels_rules__hot__t"),
        hotel_id=F("levels_rules__hot__l"),
        destination_level=F("levels_rules__dest__t"),
        destination=F("levels_rules__dest__l"),
        has_attributes=Case(
            When(
                Q(levels_rules__cp__gt=0)
                | Q(levels_rules__ps__t__gt=0)
                | Q(levels_rules__rat__t__gt=0)
                | Q(levels_rules__mrk__t__gt=0)
                | Q(levels_rules__mel__t__gt=0)
                | Q(levels_rules__cid__t__gt=0)
                | Q(levels_rules__bod__t__gt=0)
                | Q(levels_rules__rrg__t__gt=0)
                | Q(levels_rules__rel__t__gt=0)
                | Q(levels_rules__dow__t__gt=0)
                | Q(levels_rules__age__t__gt=0)
                | Q(levels_rules__room__t__gt=0)
                | Q(levels_rules__non__t__gt=0)
                | Q(levels_rules__hou__t__gt=0),
                then=True,
            ),
            default=False,
            output_field=BooleanField(),
        ),
        last_updated=F("last_date"),
        booking_date_from=F("levels_rules__bod__f"),
        booking_date_to=F("levels_rules__bod__u"),
        checkin_date_from=F("levels_rules__cid__f"),
        checkin_date_to=F("levels_rules__cid__u"),
    )
    .values(
        "rule_id",
        "credential_level",
        "credential_id",
        "provider_level",
        "provider_id",
        "hotel_level",
        "hotel_id",
        "destination_level",
        "destination",
        "last_updated",
        "booking_date_from",
        "booking_date_to",
        "checkin_date_from",
        "checkin_date_to",       
    )
)

In [16]:
date_columns = [
    "last_updated",
    "booking_date_from",
    "booking_date_to",
    "checkin_date_from",
    "checkin_date_to",
]
df_stopsales = pd.DataFrame(list(stopsales_queryset))
df_stopsales[date_columns] = df_stopsales[date_columns].apply(pd.to_datetime)
df_stopsales["credential_id"] = df_stopsales.apply(
    map_credential_group, axis=1
)
df_stopsales["provider_id"] = df_stopsales.apply(map_providers, axis=1)
df_stopsales["hotel_id"] = df_stopsales["hotel_id"].apply(remove_none_values)
df_stopsales = df_stopsales.apply(map_hotels, axis=1)
df_stopsales = df_stopsales.apply(map_destinations, axis=1)
df_stopsales.insert(7, "chain_id", df_stopsales.pop("chain_id"))
df_stopsales.insert(10, "city", df_stopsales.pop("city"))
df_stopsales.insert(11, "country", df_stopsales.pop("country"))
df_stopsales["credential_id"] = df_stopsales["credential_id"].apply(
    remove_inactive_credentials
)
df_stopsales["chain_id"] = df_stopsales["chain_id"].apply(
    lambda x: [int(i) for i in x if i.isdigit()]
)

In [17]:
max_booking_date = df_stopsales["last_updated"].max()
min_booking_date = df_stopsales["last_updated"].min() - timedelta(days=365)

In [18]:
# bookings = (
#     BookingsFinancialData.objects.filter(
#         booking_date__date__gte=min_booking_date,
#         booking_date__date__lte=max_booking_date,
#     )
#     .exclude(hotel_id__isnull=True)
#     .exclude(provider_id__isnull=True)
#     .exclude(credential__client__region__name__icontains="veci")
#     .annotate(
#         chain_id=F("hotel__chain_id"),
#         city_id=F("hotel__city_id"),
#         country_code=F("hotel__country_id"),
#     )
#     .values(
#         "booking_date",
#         "credential_id",
#         "provider_id",
#         "hotel_id",
#         "chain_id",
#         "city_id",
#         "country_code",
#         "revenue",
#     )
# )

# df_bookings = pd.DataFrame(list(bookings))
# df_bookings["booking_date"] = df_bookings["booking_date"].dt.tz_localize(None)
# df_bookings["hotel_id"] = df_bookings["hotel_id"].astype(int)
# df_bookings["chain_id"] = df_bookings["chain_id"].astype(pd.Int64Dtype())
# df_bookings["city_id"] = df_bookings["city_id"].astype(pd.Int64Dtype())

# display(df_bookings.head())

In [19]:
def get_revenue_per_rule(rule_row):
    credential = rule_row["credential_id"]
    provider = rule_row["provider_id"]
    hotel = rule_row["hotel_id"]
    chain = rule_row["chain_id"]
    city = rule_row["city"]
    country = rule_row["country"]
    start_date = rule_row["last_updated"] - timedelta(days=365)
    end_date = rule_row["last_updated"]

    query = Q(
        booking_date__date__gte=start_date,
        booking_date__date__lte=end_date,
    )

    if credential != ["All"] and credential != []:
        query &= Q(credential_id__in=credential)

    if provider != ["All"] and provider != []:
        query &= Q(provider_id__in=provider)

    if hotel != ["All"] and hotel != []:
        hotel = list(map(int, hotel))
        query &= Q(hotel_id__in=hotel)

    if chain != ["All"] and chain != []:
        chain = list(map(int, chain))
        query &= Q(hotel__chain_id__in=chain)

    if city != ["All"] and city != []:
        city = list(map(int, city))
        query &= Q(hotel__city_id__in=city)

    if country != ["All"] and country != []:
        query &= Q(hotel__country_id__in=country)

    filtered_data = BookingsFinancialData.objects.filter(query)
    total_revenue = (
        filtered_data.aggregate(total_revenue=Sum("revenue"))["total_revenue"]
        or 0
    )

    return total_revenue

In [20]:
display(df_stopsales.head(30))

Unnamed: 0,rule_id,credential_level,credential_id,provider_level,provider_id,hotel_level,hotel_id,chain_id,destination_level,destination,city,country,last_updated,booking_date_from,booking_date_to,checkin_date_from,checkin_date_to
0,509914,1,"[34020, 34030, 34041, 34511, 34140, 34142, 341...",1,[BCONG],1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],0,[],[All],[All],2024-07-31,NaT,NaT,NaT,NaT
1,291912,1,"[3648, 32775, 3576, 11266, 11404, 11409, 11506...",3,"[NITES, ASATRVEEU, GUEVECI, ATEG, VECTORZZ, PR...",2,[All],[4853],0,[],[All],[All],2023-12-21,NaT,NaT,NaT,NaT
2,395625,1,"[34030, 11617, 34193, 3576, 4204, 11409, 11506...",1,"[BKGEXP, VBK]",1,"[128419, 1812665, 3848436, 1836508, 2299131, 2...",[],0,[],[All],[All],2024-08-19,NaT,NaT,NaT,NaT
3,468559,1,"[3648, 34030, 11617, 3576, 11409, 4204, 11506,...",1,"[ROI, ROIB2B]",2,[All],[197],0,[],[All],[All],2024-05-14,NaT,NaT,NaT,NaT
4,528277,1,"[34020, 34030, 34041, 11993, 34140, 34142, 341...",1,[BCONG],1,[925997],[],0,[],[All],[All],2024-07-02,NaT,NaT,NaT,NaT
5,395670,1,"[34030, 3576, 4204, 11409, 11506, 11617, 11753...",1,"[YPL, PRSP]",1,"[102639, 132415, 446424, 3869753, 95448, 13239...",[],0,[],[All],[All],2024-08-06,2024-08-06,2025-01-15,NaT,NaT
6,445980,1,"[34030, 3576, 4204, 11409, 11506, 11617, 11753...",1,[BCONG],1,[149014],[],0,[],[All],[All],2024-05-20,NaT,NaT,NaT,NaT
7,395635,1,"[3648, 34030, 3576, 4982, 11409, 11506, 11617,...",1,"[SMD, BCONG]",1,"[3091, 3331960, 102232, 267108]",[],0,[],[All],[All],2024-04-08,NaT,NaT,NaT,NaT
8,491415,1,"[34020, 34030, 34041, 34511, 34140, 34142, 341...",1,"[APR, BCONG]",1,[213637],[],0,[],[All],[All],2024-10-24,NaT,NaT,NaT,NaT
9,395640,1,"[3648, 34020, 34030, 34193, 34052, 3576, 33284...",1,"[SHS2, BCONG, SHS]",1,"[174931, 27883, 19569, 127755, 205026, 551870,...",[],0,[],[All],[All],2024-05-22,2024-02-07,2024-12-12,NaT,NaT


In [21]:
start_time = time.time()
df_stopsales['total_revenue'] = df_stopsales.apply(get_revenue_per_rule, axis=1)
end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")

Execution time: 39.3420 seconds


In [22]:
def filter_by_credential(df, credential_id):
    return df[
        df["credential_id"].apply(
            lambda x: str(credential_id) in x or "All" in x
        )
    ]