In [None]:
df_stopsales["total_revenue"] = df_stopsales.apply(
    get_revenue_per_rule, axis=1
)

In [None]:
def get_revenue_per_rule(rule_row):
    credential = rule_row["credential_id"]
    provider = rule_row["provider_id"]
    hotel = rule_row["hotel_id"]
    chain = rule_row["chain_id"]
    city = rule_row["city"]
    country = rule_row["country"]
    start_date = rule_row["last_updated"] - timedelta(days=365)
    end_date = rule_row["last_updated"]

    query = Q(
        booking_date__date__gte=start_date,
        booking_date__date__lte=end_date,
    )

    if credential != ["All"] and credential != []:
        query &= Q(credential_id__in=credential)

    if provider != ["All"] and provider != []:
        query &= Q(provider_id__in=provider)

    if hotel != ["All"] and hotel != []:
        hotel = list(map(int, hotel))
        query &= Q(hotel_id__in=hotel)

    if chain != ["All"] and chain != []:
        chain = list(map(int, chain))
        query &= Q(hotel__chain_id__in=chain)

    if city != ["All"] and city != []:
        city = list(map(int, city))
        query &= Q(hotel__city_id__in=city)

    if country != ["All"] and country != []:
        query &= Q(hotel__country_id__in=country)

    filtered_data = BookingsFinancialData.objects.filter(query)
    total_revenue = (
        filtered_data.aggregate(total_revenue=Sum("revenue"))["total_revenue"]
        or 0
    )

    return total_revenue

In [None]:
def remove_inactive_credentials(credential_ids):
    filtered_ids = []
    for credential_id in credential_ids:
        if credential_id == "All":
            filtered_ids.append(credential_id)
            continue

        credential_info = credential_dict.get(credential_id)
        if credential_info:
            enabled = credential_info["enabled"]
            region = credential_info["region"]

            if (enabled or credential_id == "All") and (
                region is None or "veci" not in region.lower()
            ):
                filtered_ids.append(credential_id)

    return filtered_ids


df_stopsales["filtered_credentials"] = df_stopsales["credential_id"].apply(
    remove_inactive_credentials
)

In [None]:
def remove_none_values(ids):
    if ids is None:
        return None

    return [id for id in ids if id is not None]

In [None]:
def is_valid_hotel_ids(hotel_ids):
    if hotel_ids is None:
        return True

    for hotel_id in hotel_ids:
        if hotel_id is None or (
            not isinstance(hotel_id, str) or not hotel_id.isdigit()
        ):
            return False
    return True


invalid_rows = df_stopsales[
    df_stopsales["hotel_id"].apply(
        lambda hotel_ids: not is_valid_hotel_ids(hotel_ids)
    )
]

display(invalid_rows)

In [None]:
df_stopsales_hotel_0 = (
    df_stopsales_hotel_0.explode("credential_id")
    .explode("provider_id")
    .explode("hotel_id")
    .reset_index(drop=True)
)

In [None]:
def calculate_revenue_summary(bookings, start_date, end_date):
    """Calculate revenue summary within a specific date range."""
    bookings_filtered = bookings[
        (bookings["booking_date"] >= start_date)
        & (bookings["booking_date"] <= end_date)
    ].copy()
    revenue_sum = bookings_filtered["revenue"].sum()

    # Group by week
    bookings_filtered["week"] = bookings_filtered["booking_date"].dt.to_period(
        "W"
    )
    weekly_revenue = bookings_filtered.groupby("week")["revenue"].sum()
    avg_weekly_revenue = (
        weekly_revenue.mean() if not weekly_revenue.empty else 0
    )

    # Get period covered
    first_date = (
        bookings_filtered["booking_date"].min()
        if not bookings_filtered.empty
        else None
    )
    last_date = (
        bookings_filtered["booking_date"].max()
        if not bookings_filtered.empty
        else None
    )
    period_covered = (
        f"{first_date.date()} to {last_date.date()}"
        if first_date and last_date
        else "No sales during period"
    )
    num_weeks_covered = (
        (last_date - first_date).days // 7 if first_date and last_date else 0
    )

    return {
        "revenue_sum": revenue_sum,
        "avg_weekly_revenue": avg_weekly_revenue,
        "period_covered": period_covered,
        "num_weeks_covered": num_weeks_covered,
    }

In [None]:
avg_weekly_revenues = []

for _, rule in df_stopsales.iterrows():
    try:
        # Check if we should get all credentials (when credential_id is "All")
        if rule["credential_id"] == "All":
            # Use only provider_id and hotel_id to filter
            bookings = df_bookings.xs(
                (rule["provider_id"], rule["hotel_id"]),
                level=["provider_id", "hotel_id"],
            ).copy()
        else:
            # Use all three levels to filter
            bookings = df_bookings.loc[
                (rule["credential_id"], rule["provider_id"], rule["hotel_id"])
            ].copy()

        # Further filter by booking_date as a regular column for TTM period
        rule_bookings_ttm = bookings[
            (bookings["booking_date"] >= rule["last_year"])
            & (bookings["booking_date"] <= rule["last_updated"])
        ].copy()

        # Calculate the total revenue for the TTM period
        revenue_ttm = rule_bookings_ttm["revenue"].sum()

        # Group by week to sum up weekly revenue within TTM
        rule_bookings_ttm["week"] = rule_bookings_ttm[
            "booking_date"
        ].dt.to_period("W")
        weekly_revenue_ttm = rule_bookings_ttm.groupby("week")["revenue"].sum()

        # Calculate the average weekly revenue within TTM
        avg_weekly_revenue_ttm = (
            weekly_revenue_ttm.mean() if not weekly_revenue_ttm.empty else 0
        )

        # Check for the first and last booking date in the TTM period
        first_booking_date = (
            rule_bookings_ttm["booking_date"].min()
            if not rule_bookings_ttm.empty
            else None
        )
        last_booking_date = (
            rule_bookings_ttm["booking_date"].max()
            if not rule_bookings_ttm.empty
            else None
        )

        # Determine the actual production period covered (based on bookings)
        if first_booking_date and last_booking_date:
            num_weeks_covered = (
                last_booking_date - first_booking_date
            ).days // 7
            period_covered = (
                f"{first_booking_date.date()} to {last_booking_date.date()}"
            )
        else:
            num_weeks_covered = 0
            period_covered = "No sales during period"

        # Further filter by booking_date as a regular column for after period
        rule_bookings_after = bookings[
            bookings["booking_date"] > rule["last_updated"]
        ].copy()

        # Calculate the revenue after the last_updated
        revenue_after = rule_bookings_after["revenue"].sum()

        # Group by week to sum up weekly revenue after
        rule_bookings_after["week"] = rule_bookings_after[
            "booking_date"
        ].dt.to_period("W")
        weekly_revenue_after = rule_bookings_after.groupby("week")[
            "revenue"
        ].sum()

        # Calculate the average weekly revenue after
        avg_weekly_revenue_after = (
            weekly_revenue_after.mean()
            if not weekly_revenue_after.empty
            else 0
        )

        # Append results with identifying information
        avg_weekly_revenues.append(
            {
                "rule_id": rule["rule_id"],
                "credential_level": rule["credential_level"],
                "credential_id": rule["credential_id"],
                "provider_id": rule["provider_id"],
                "hotel_id": rule["hotel_id"],
                "last_updated": rule["last_updated"],
                "has_attributes": rule["has_attributes"],
                "period_covered": period_covered,
                "num_weeks_covered": num_weeks_covered,
                "avg_weekly_revenue_ttm": avg_weekly_revenue_ttm,
                "revenue_ttm": revenue_ttm,
                "revenue_after": revenue_after,
                "avg_weekly_revenue_after": avg_weekly_revenue_after,
            }
        )

    except KeyError:
        # Handle cases where the credential/provider/hotel combination isn't in df_bookings
        avg_weekly_revenues.append(
            {
                "rule_id": rule["rule_id"],
                "credential_level": rule["credential_level"],
                "credential_id": rule["credential_id"],
                "provider_id": rule["provider_id"],
                "hotel_id": rule["hotel_id"],
                "last_updated": rule["last_updated"],
                "has_attributes": rule["has_attributes"],
                "period_covered": "No data available",
                "num_weeks_covered": 0,
                "avg_weekly_revenue_ttm": 0,
                "revenue_ttm": 0,
                "revenue_after": 0,
                "avg_weekly_revenue_after": 0,
            }
        )

# Convert the list of dictionaries to a DataFrame
df_avg_revenue = pd.DataFrame(avg_weekly_revenues)

In [None]:
avg_weekly_revenues = []

for _, rule in df_stopsales.iterrows():
    rule_bookings = df_bookings[
        (df_bookings["credential_id"] == rule["credential_id"])
        & (df_bookings["provider_id"] == rule["provider_id"])
        & (df_bookings["hotel_id"] == rule["hotel_id"])
        & (df_bookings["booking_date"] >= rule["ttm_start_date"])
        & (df_bookings["booking_date"] <= rule["last_date"])
    ]

    # Group by week to sum up weekly revenue
    rule_bookings["week"] = rule_bookings["booking_date"].dt.to_period("W")
    weekly_revenue = rule_bookings.groupby("week")["revenue"].sum()

    # Calculate the average weekly revenue
    avg_weekly_revenue = weekly_revenue.mean()

    # Append results with identifying information
    avg_weekly_revenues.append(
        {
            "rule_id": rule["rule_id"],
            "credential_id": rule["credential_id"],
            "provider_id": rule["provider_id"],
            "hotel_id": rule["hotel_id"],
            "avg_weekly_revenue": avg_weekly_revenue,
        }
    )

# Convert results to a DataFrame
df_avg_revenue = pd.DataFrame(avg_weekly_revenues)

In [None]:
# Merge df_stopsales and df_bookings based on common identifiers to avoid repetitive filtering
merged_df = pd.merge(
    df_bookings,
    df,
    on=["credential_id", "provider_id", "hotel_id"],
    suffixes=("_booking", "_rule"),
)

# Filter rows by date conditions in a vectorized way
filtered_df = merged_df[
    (merged_df["booking_date"] >= merged_df["ttm_start_date"])
    & (merged_df["booking_date"] <= merged_df["last_date"])
]

# Add a column to group by week for weekly revenue calculation
filtered_df["week"] = filtered_df["booking_date"].dt.to_period("W")

# Group by rule identifiers and week, and sum up weekly revenue
weekly_revenue = (
    filtered_df.groupby(
        ["rule_id", "credential_id", "provider_id", "hotel_id", "week"]
    )["revenue"]
    .sum()
    .reset_index()
)

# Now calculate the average weekly revenue per rule
df_avg_revenue = (
    weekly_revenue.groupby(
        ["rule_id", "credential_id", "provider_id", "hotel_id"]
    )["revenue"]
    .mean()
    .reset_index(name="avg_weekly_revenue")
)