In [1]:
from pathlib import Path
import json
import time

import django
import pandas as pd
import requests

django.setup()

from django.conf import settings
from django.db.models import F, Q, Case, When, BooleanField, Sum
from django.utils.timezone import timedelta, now

from reports.models import SpecificStopSalesRules, BookingsFinancialData
from hotel.models import Hotel, Chain, Provider
from clients.models import Credential

In [2]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.float_format", "{:.2f}".format)

In [3]:
base_dir = Path(".").resolve()
data_dir = Path(".").resolve() / "data"

In [4]:
class StopSalesDataProcessor:
    def __init__(self, org, tags, active=True, exclude_auto=True):
        self.org = org
        self.tags = tags
        self.active = active
        self.exclude_auto = exclude_auto
        self.date_columns = [
            "last_updated",
            "booking_from",
            "booking_to",
            "check_in_from",
            "check_in_to",
        ]
        self.credential_group = self.get_credential_group()
        self.credential_dict = self.get_credential_dict()

    def get_stopsales_as_df(self, rule_id=None, credential_id=None, provider_id=None):
        queryset = self.get_stopsales_data(rule_id)
        df = pd.DataFrame(list(queryset))
        df = self._process_dates(df)
        df = self._transform_level_fields(df)
        df = self.reorder_columns(df)

        if rule_id:
            df = df.explode("credential_id").reset_index(drop=True)

        if rule_id and credential_id:
            df = df[df["credential_id"] == credential_id]
            df = df.explode("provider_id").reset_index(drop=True)

        if rule_id and credential_id and provider_id:
            df = df[
                (df["credential_id"] == credential_id) & (df["provider_id"] == provider_id)
            ]
            df = df.explode("hotel_id").reset_index(drop=True)

        return df

    def get_stopsales_data(self, rule_id=None):
        queryset = SpecificStopSalesRules.objects.filter(
            tag__in=self.tags,
            organization=self.org,
            active=self.active,
        )
        
        if self.exclude_auto:
            queryset = queryset.exclude(name="Automatic blacklist.")
            
        if rule_id:
            queryset = queryset.filter(rule_id=rule_id)
            
        return self.annotate_rules(queryset).values(
            "rule_id", "credential_level", "credential_id", "provider_level",
            "provider_id", "hotel_level", "hotel_id", "destination_level", 
            "destination", "last_updated", "booking_from", "booking_to", 
            "check_in_from", "check_in_to", "has_attributes"
        )

    @staticmethod
    def annotate_rules(queryset):
        return queryset.annotate(
            credential_level=F("levels_rules__cli__t"),
            credential_id=F("levels_rules__cli__l"),
            provider_level=F("levels_rules__prv__t"),
            provider_id=F("levels_rules__prv__l"),
            hotel_level=F("levels_rules__hot__t"),
            hotel_id=F("levels_rules__hot__l"),
            destination_level=F("levels_rules__dest__t"),
            destination=F("levels_rules__dest__l"),
            last_updated=F("last_date"),
            booking_from=F("levels_rules__bod__f"),
            booking_to=F("levels_rules__bod__u"),
            check_in_from=F("levels_rules__cid__f"),
            check_in_to=F("levels_rules__cid__u"),
            has_attributes=Case(
                When(
                    Q(levels_rules__cp__gt=0)
                    | Q(levels_rules__ps__t__gt=0)
                    | Q(levels_rules__rat__t__gt=0)
                    | Q(levels_rules__mrk__t__gt=0)
                    | Q(levels_rules__mel__t__gt=0)
                    | Q(levels_rules__cid__t__gt=0)
                    | Q(levels_rules__bod__t__gt=0)
                    | Q(levels_rules__rrg__t__gt=0)
                    | Q(levels_rules__rel__t__gt=0)
                    | Q(levels_rules__dow__t__gt=0)
                    | Q(levels_rules__age__t__gt=0)
                    | Q(levels_rules__room__t__gt=0)
                    | Q(levels_rules__non__t__gt=0)
                    | Q(levels_rules__hou__t__gt=0),
                    then=True,
                ),
                default=False,
                output_field=BooleanField(),
            ),
        )

    def _process_dates(self, df):
        df[self.date_columns] = df[self.date_columns].apply(pd.to_datetime)
        return df


    def _transform_level_fields(self, df):
        df["credential_id"] = df.apply(self._map_credential_group, axis=1)
        df["credential_id"] = df["credential_id"].apply(self._remove_inactive_and_veci)
        df["provider_id"] = df.apply(self._map_providers, axis=1)
        df["hotel_id"] = df["hotel_id"].apply(self._remove_invalid_hotel_ids)
        df = df.apply(self._map_hotels, axis=1)
        df = df.apply(self._map_destinations, axis=1)
        df = df[~df["credential_id"].apply(lambda x: x == [])]
        return df


    @staticmethod
    def get_credential_group():
        df = pd.read_json(data_dir / "credential_group.json")

        return dict(zip(df["id"].astype(str), df["credential_list"]))


    @staticmethod
    def get_credential_dict():
        return {
            credential["id"]: {
                "enabled": credential["enabled"],
                "region": credential["client__region_id"],
            }
            for credential in Credential.objects.select_related("client__region").values(
                "id", "enabled", "client__region_id"
            )
        }


    def _map_credential_group(self, row):
        level = row["credential_level"]
        credentials = row["credential_id"]

        mapping_dict = {
            0: lambda: ["All"],
            2: lambda: self._expand_credentials(credentials),
        }

        return mapping_dict.get(level, lambda: credentials)()


    def _expand_credentials(self, credential_ids):
        expanded_credentials = []
        for group_id in credential_ids:
            expanded_credentials.extend(self.credential_group.get(group_id, []))

        return list(dict.fromkeys(expanded_credentials))


    def _map_providers(self, row):
        level = row["provider_level"]
        mapping_dict = {
            0: ["All"],
            2: ["All Third"],
            3: ["All Direct"],
        }

        return mapping_dict.get(level, row["provider_id"])


    def _map_hotels(self, row):
        level = row["hotel_level"]
        mapping_dict = {
            0: (["All"], ["All"]),
            1: (row["hotel_id"], []),
            2: (["All"], row["hotel_id"]),
        }

        hotel_id, chain_id = mapping_dict[level]
        row["hotel_id"] = hotel_id
        row["chain_id"] = chain_id

        return row


    def _map_destinations(self, row):
        level = row["destination_level"]
        mapping_dict = {
            0: (["All"], ["All"]),
            1: (row["destination"], []),
            2: (["All"], row["destination"]),
        }

        city, country = mapping_dict[level]
        row["city"] = city
        row["country"] = country

        return row


    def _remove_invalid_hotel_ids(self, ids):
        if ids is None:
            return None

        return [id for id in ids if isinstance(id, str) and id.isdigit()]


    def _remove_inactive_and_veci(self, credential_ids):
        def is_valid(credential_id):
            if credential_id == "All":
                return True

            credential_info = self.credential_dict.get(credential_id)
            if not credential_info:
                return False

            enabled = credential_info["enabled"]
            region = credential_info["region"]

            return enabled and (region and "veci" not in region.lower())

        return [
            credential_id
            for credential_id in credential_ids
            if is_valid(credential_id)
        ]
    
    
    @staticmethod
    def reorder_columns(df):
        df.insert(7, "chain_id", df.pop("chain_id"))
        df.insert(8, "city", df.pop("city"))
        df.insert(9, "country", df.pop("country"))
        return df

In [5]:
class MetricsProcessor:
    def __init__(self):
        pass

    def get_dataframe(self, rule_row):
        queryset = self.get_bookings_data(rule_row)
        df = pd.DataFrame(list(queryset))
        return df

    def get_bookings_data(self, rule_row):
        filters = self._build_filter_conditions(rule_row)
        return (
            BookingsFinancialData.objects.filter(filters)
            .exclude(credential__client__region__name__icontains="veci")
            .values()
        )

    def _build_filter_conditions(self, rule_row):
        credential = rule_row["credential_id"]
        provider = rule_row["provider_id"]
        hotel = rule_row["hotel_id"]
        chain = rule_row["chain_id"]
        city = rule_row["city"]
        country = rule_row["country"]

        filters = self._build_date_range_filter(rule_row)

        filter_dict = {
            "credential": self._build_field_filter("credential_id", credential),
            "provider": self._build_field_filter("provider_id", provider),
            "hotel": self._build_field_filter("hotel_id", hotel, transform=int),
            "chain": self._build_field_filter("hotel__chain_id", chain, transform=int),
            "city": self._build_field_filter("hotel__city_id", city, transform=int),
            "country": self._build_field_filter("hotel__country_id", country),
        }

        for filter in filter_dict.values():
            filters &= filter

        return filters

    def _build_date_range_filter(self, rule_row):
        start_date = rule_row["last_updated"] - timedelta(days=365)
        end_date = rule_row["last_updated"]
        return Q(
            booking_date__date__gte=start_date,
            booking_date__date__lte=end_date,
        )

    def _build_field_filter(self, field_name, values, transform=None):
        if values in [["All"], []]:
            return Q()
        
        if not isinstance(values, list):
            values = [values]

        values = list(map(transform, values)) if transform else values

        field_condition_map = {
            "provider_id": lambda values: (
                Q(provider__direct=(values == ["All Direct"]))
                if values in [["All Third"], ["All Direct"]]
                else None
            ),
        }

        field_condition = field_condition_map.get(field_name, lambda _: None)
        condition = field_condition(values)

        return condition if condition else Q(**{f"{field_name}__in": values})

    def get_metrics(self, rule_row):
        rule_row = rule_row.to_dict()
        df = self.get_dataframe(rule_row)
        
        result = {
            "revenue": 0,
            "avg_revenue_per_week": 0,
            "period_covered": "No sales during period",
            "number_of_weeks_covered": 0,
        }

        if df.empty:
            return result

        result["revenue"] = df["net_agency"].sum()
        result["avg_revenue_per_week"] = self._calculate_avg_revenue_per_week(df)

        first_date = df["booking_date"].min()
        last_date = df["booking_date"].max()

        result["period_covered"] = f"{first_date.date()} to {last_date.date()}"
        result["number_of_weeks_covered"] = (last_date - first_date).days // 7

        return result

    def _calculate_avg_revenue_per_week(self, df):
        df["booking_date"] = df["booking_date"].dt.tz_localize(None)
        df["week"] = df["booking_date"].dt.to_period("W")
        revenue_per_week = df.groupby("week")["net_agency"].sum()
        return revenue_per_week.mean()

In [6]:
start_time = time.time()

stopsales_data_processor = StopSalesDataProcessor(org="lgt", tags=[-1, 1, 3])
metrics_processor = MetricsProcessor()

df_stopsales = stopsales_data_processor.get_stopsales_as_df()
df_stopsales[
    ["revenue", "avg_revenue_per_week", "period_covered", "number_of_weeks_covered"]
] = df_stopsales.apply(
    lambda row: pd.Series(metrics_processor.get_metrics(row)), axis=1
)

end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")

Execution time: 51.5449 seconds


In [7]:
df_stopsales.sort_values(by="revenue", ascending=False, inplace=True)
display(df_stopsales.head(10))

Unnamed: 0,rule_id,credential_level,credential_id,provider_level,provider_id,hotel_level,hotel_id,chain_id,city,country,destination_level,destination,last_updated,booking_from,booking_to,check_in_from,check_in_to,has_attributes,revenue,avg_revenue_per_week,period_covered,number_of_weeks_covered
1155,498300,1,"[11807, 5318]",0,[All],0,[All],[All],[All],[All],0,[],2024-07-17,NaT,NaT,NaT,NaT,True,60313929.22,1137998.66,2023-07-17 to 2024-07-17,52
874,47290,0,[All],1,"[EXPRTA, EXPRH, EXPR, EXPRDS, EXPRDSH]",0,[All],[All],[All],[All],0,,2024-09-10,NaT,NaT,NaT,NaT,True,50180886.39,929275.67,2023-09-10 to 2024-09-10,52
910,563194,1,"[33284, 22003, 30830, 6288]",0,[All],0,[All],[All],[All],[All],0,[],2024-07-23,NaT,NaT,NaT,NaT,True,9457590.98,175140.57,2023-07-23 to 2024-07-23,52
84,2528,1,[4115],0,[All],0,[All],[All],[All],[All],0,,2022-09-21,NaT,NaT,NaT,NaT,True,2834465.11,53480.47,2021-09-21 to 2022-09-21,52
1135,395090,0,[All],1,"[SMD, BCONG]",2,[All],[4578],[All],[All],0,[],2024-11-28,NaT,NaT,NaT,NaT,True,2598538.15,49029.02,2023-11-29 to 2024-11-28,52
248,475040,1,"[34030, 3576, 4204, 11409, 11506, 11617, 11753...",1,"[RTG, BCONG]",1,"[2653227, 508202, 82059, 3897550, 3976641, 154...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,2289285.81,43194.07,2023-12-12 to 2024-12-11,52
884,88255,0,[All],1,[AGO],0,[All],[All],[All],[All],0,,2022-09-21,NaT,NaT,NaT,NaT,True,2106471.4,39744.74,2021-09-20 to 2022-09-21,52
23,2795,0,[All],1,[BCONG],1,"[570998, 1857065, 335562, 11054, 161937, 22168...",[],[All],[All],0,,2022-09-21,NaT,NaT,NaT,NaT,True,1651849.69,31166.98,2021-09-21 to 2022-09-21,52
318,467435,1,[11656],1,"[EXPRTA, NRBEU, NRBES]",0,[All],[All],[All],"[ES, IT, GR, CH, CN, MX]",2,"[ES, IT, GR, CH, CN, MX]",2024-11-19,NaT,NaT,NaT,NaT,False,1367779.0,25807.15,2023-11-20 to 2024-11-19,52
1100,2790,0,[All],1,"[DIN2, DIN, DNG, BCONG]",1,"[1870256, 1869426, 11192, 222357, 277005, 1850...",[],[All],[All],0,,2022-09-21,NaT,NaT,NaT,NaT,True,1128806.71,21298.24,2021-09-21 to 2022-09-21,52


In [8]:
start_time = time.time()

stopsales_data_processor = StopSalesDataProcessor(org="lgt", tags=[-1, 1, 3])
df_rule = stopsales_data_processor.get_stopsales_as_df("509914")
df_rule[
    ["revenue", "avg_revenue_per_week", "period_covered", "number_of_weeks_covered"]
] = df_rule.apply(
    lambda row: pd.Series(metrics_processor.get_metrics(row)), axis=1
)
df_rule.sort_values(by="revenue", ascending=False, inplace=True)
end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")
print("Size:", df_rule.shape)
display(df_rule.head())

Execution time: 1.8148 seconds
Size: (348, 22)


Unnamed: 0,rule_id,credential_level,credential_id,provider_level,provider_id,hotel_level,hotel_id,chain_id,city,country,destination_level,destination,last_updated,booking_from,booking_to,check_in_from,check_in_to,has_attributes,revenue,avg_revenue_per_week,period_covered,number_of_weeks_covered
189,509914,1,8550,1,[BCONG],1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,8491.33,530.71,2024-02-21 to 2024-07-14,20
117,509914,1,8924,1,[BCONG],1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,5673.0,436.38,2024-02-17 to 2024-07-15,21
190,509914,1,5318,1,[BCONG],1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,1797.62,599.21,2024-05-26 to 2024-06-04,1
298,509914,1,11655,1,[BCONG],1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,1678.85,559.62,2024-03-30 to 2024-05-24,7
77,509914,1,33904,1,[BCONG],1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,980.85,326.95,2024-03-22 to 2024-04-28,5


In [9]:
start_time = time.time()

stopsales_data_processor = StopSalesDataProcessor(org="lgt", tags=[-1, 1, 3])
df_rule = stopsales_data_processor.get_stopsales_as_df("509914", "8550")
df_rule[
    ["revenue", "avg_revenue_per_week", "period_covered", "number_of_weeks_covered"]
] = df_rule.apply(
    lambda row: pd.Series(metrics_processor.get_metrics(row)), axis=1
)
df_rule.sort_values(by="revenue", ascending=False, inplace=True)
end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")
print("Size:", df_rule.shape)
display(df_rule)

Execution time: 0.0622 seconds
Size: (1, 22)


Unnamed: 0,rule_id,credential_level,credential_id,provider_level,provider_id,hotel_level,hotel_id,chain_id,city,country,destination_level,destination,last_updated,booking_from,booking_to,check_in_from,check_in_to,has_attributes,revenue,avg_revenue_per_week,period_covered,number_of_weeks_covered
0,509914,1,8550,1,BCONG,1,"[625423, 3758660, 3936090, 556770, 2512893, 18...",[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,8491.33,530.71,2024-02-21 to 2024-07-14,20


In [10]:
start_time = time.time()

stopsales_data_processor = StopSalesDataProcessor(org="lgt", tags=[-1, 1, 3])
df_rule = stopsales_data_processor.get_stopsales_as_df("509914", "8550", "BCONG")
df_rule[
    ["revenue", "avg_revenue_per_week", "period_covered", "number_of_weeks_covered"]
] = df_rule.apply(
    lambda row: pd.Series(metrics_processor.get_metrics(row)), axis=1
)
df_rule.sort_values(by="revenue", ascending=False, inplace=True)
end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")

display(df_rule)

Execution time: 0.0950 seconds


Unnamed: 0,rule_id,credential_level,credential_id,provider_level,provider_id,hotel_level,hotel_id,chain_id,city,country,destination_level,destination,last_updated,booking_from,booking_to,check_in_from,check_in_to,has_attributes,revenue,avg_revenue_per_week,period_covered,number_of_weeks_covered
8,509914,1,8550,1,BCONG,1,1870585,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,6368.75,398.05,2024-02-21 to 2024-07-14,20
5,509914,1,8550,1,BCONG,1,1870757,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,2122.58,707.53,2024-03-12 to 2024-07-13,17
0,509914,1,8550,1,BCONG,1,625423,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
1,509914,1,8550,1,BCONG,1,3758660,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
2,509914,1,8550,1,BCONG,1,3936090,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
3,509914,1,8550,1,BCONG,1,556770,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
4,509914,1,8550,1,BCONG,1,2512893,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
6,509914,1,8550,1,BCONG,1,1877099,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
7,509914,1,8550,1,BCONG,1,3660726,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0
9,509914,1,8550,1,BCONG,1,3898384,[],[All],[All],0,[],2024-12-11,NaT,NaT,NaT,NaT,False,0.0,0.0,No sales during period,0


In [11]:
df_stopsales.to_csv("df_stopsales.csv", index=False)