In [None]:
#importing the necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#Reading the files
fact_bookings = pd.read_csv('fact_bookings.csv')
fact_agg_bookings = pd.read_csv('fact_aggregated_bookings.csv')
dim_rooms = pd.read_csv('dim_rooms.csv')
dim_hotels = pd.read_csv('dim_hotels.csv')
dim_date = pd.read_csv('dim_date.csv')

In [None]:
# Converting the string date types to date types that can be read/manipulated in python
fact_bookings['booking_date'] = pd.to_datetime(fact_bookings['booking_date'])
fact_bookings['check_in_date'] = pd.to_datetime(fact_bookings['check_in_date'])
fact_bookings['checkout_date'] = pd.to_datetime(fact_bookings['checkout_date'])
dim_date['date'] = pd.to_datetime(dim_date['date'])

In [None]:
#Building a single dataset for easy access (Merging)
df = fact_bookings.merge(dim_rooms, left_on='room_category', right_on='room_id',how='left')
df = df.merge(dim_hotels, on='property_id',how='left')
df = df.merge(dim_date, left_on='check_in_date', right_on='date',how='left')

In [None]:


# Merging datasets using actual column names and file names
df = fact_bookings.merge(dim_rooms, left_on='room_category', right_on='room_id', how='left')
df = df.merge(dim_hotels, on='property_id', how='left')
df = df.merge(dim_date, left_on='check_in_date', right_on='date', how='left')

df_valid = df[df['revenue_generated'].notnull() & df['revenue_realized'].notnull()]

# Calculating total revenue lost due to cancellations or non-completions
df_valid['revenue_lost'] = df_valid['revenue_generated'] - df_valid['revenue_realized']

# Total revenue metrics
total_generated = df_valid['revenue_generated'].sum()
total_realized = df_valid['revenue_realized'].sum()
total_lost = df_valid['revenue_lost'].sum()

# Calculating % leakage
percent_leakage = (total_lost / total_generated) * 100

total_generated, total_realized, total_lost, percent_leakage


In [None]:
# These should match your actual calculated results
revenue_generated = 2007546215  # Total revenue generated (₹)
revenue_realized = 1708771229   # Total revenue realized (₹)

# Calculate the revenue leakage amount
leakage_amount = revenue_generated - revenue_realized

scenarios = {
    "Best Case": 0.50,        # Recover 50% of leakage
    "Most Likely": 0.30,      # Recover 30%
    "Worst Case": 0.10        # Recover 10%
}


In [None]:
forecast_results = []

for scenario, recovery_rate in scenarios.items():
    recovered_revenue = leakage_amount * recovery_rate
    projected_revenue = revenue_realized + recovered_revenue

    forecast_results.append({
        "Scenario": scenario,
        "Recovery %": f"{int(recovery_rate * 100)}%",
        "Recovered Revenue (₹)": round(recovered_revenue, 2),
        "Projected Revenue (₹)": round(projected_revenue, 2)
    })

import pandas as pd
forecast_df = pd.DataFrame(forecast_results)
forecast_df

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))
plt.bar(forecast_df["Scenario"], forecast_df["Projected Revenue (₹)"], color=['green', 'orange', 'red'])
plt.title("Projected Revenue by Scenario")
plt.ylabel("Projected Revenue (₹)")
plt.xlabel("Scenario")
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()


In [None]:
#Calculating total revenue leakage
total_revenue_generated = fact_bookings['revenue_generated'].sum()
total_revenue_realized = fact_bookings['revenue_realized'].sum()
total_revenue_leakage = total_revenue_generated - total_revenue_realized

# Define implementation cost (assumed)
cost_of_implementation = 10000000  # in currency units INR

# Defining
scenarios = {
    "Conservative (10%)": 10,
    "Moderate (25%)": 25,
    "Aggressive (50%)": 50,
}

results = []

# Loop through scenarios and compute ROI
for name, reduction_pct in scenarios.items():
    revenue_saved = total_revenue_leakage * (reduction_pct / 100)
    new_realized = total_revenue_realized + revenue_saved
    roi = ((revenue_saved - cost_of_implementation) / cost_of_implementation) * 100

    results.append({
        "Scenario": name,
        "Leakage Reduction %": reduction_pct,
        "Revenue Saved": int(revenue_saved),
        "New Revenue Realized": int(new_realized),
        "Implementation Cost": cost_of_implementation,
        "ROI %": round(roi, 2)
    })


business_impact_df = pd.DataFrame(results)


display(business_impact_df)
