In [None]:
# ───────────────────────────────────────────────────────────────
#  EMPIRICAL‑BEST FIXED‑PRICE BASELINE
#  – Hindsight‑optimal single price using full dataset –
#  Produces:
#      ► optimal_price, optimal_total_profit
#      ► baseline_results      (price–profit curve)
#      ► weekly_profit_opt     (profit per ISO‑week under baseline)
#      ► weekly_cum_profit_opt (cumulative profit series)
#      ► helper: compute_regret(model_profit)
#  Plots:
#      1. Total‑Profit vs Price (with optimal marker)
#      2. Empirical Demand Curve  (Price bin center vs Avg Quantity)
# ───────────────────────────────────────────────────────────────
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ---------------------------------------------------------------
# ① LOAD  DATA
# ---------------------------------------------------------------
file_path = r'D:\IITR DS Final Year Thesis\Dataset Superstore\archive\generated_single_product_dataset_with_seasonal_variation.csv'

df = pd.read_csv(file_path, parse_dates=['date'])
# keep only positive quantities
df = df[df['quantity'] > 0].copy()

# ---------------------------------------------------------------
# ② GRID OF CANDIDATE PRICES & PROFIT EVALUATION
#    Profit(P) = Σ_{i: price_paid_i ≥ P} (P - cost_i) * quantity_i
#    (we assume customers who originally paid ≥ P would still buy)
# ---------------------------------------------------------------
min_price = df['price_paid'].min()
max_price = df['price_paid'].max()

# fine grid – 300 equally‑spaced points
price_grid = np.linspace(min_price, max_price, 300)
profits = np.empty_like(price_grid)

for idx, P in enumerate(price_grid):
    mask = df['price_paid'] >= P
    profits[idx] = ((P - df.loc[mask, 'cost']) * df.loc[mask, 'quantity']).sum()

# identify best candidate
best_idx       = int(np.argmax(profits))
optimal_price  = float(price_grid[best_idx])
optimal_profit = float(profits[best_idx])

# store full curve for later use
baseline_results = pd.DataFrame({'price_candidate': price_grid,
                                 'total_profit'   : profits})

print(f"► Optimal fixed price (hindsight): {optimal_price:,.2f}")
print(f"► Total profit at optimal price  : {optimal_profit:,.2f}")

# ---------------------------------------------------------------
# ③ WEEK‑LEVEL PROFIT SERIES  (helps compute cumulative regret)
# ---------------------------------------------------------------
# ISO week index (1‑53 per ISO calendar)
df['iso_week'] = df['date'].dt.isocalendar().week

# Profit per transaction *if* we had charged the optimal price
df['profit_optimal'] = np.where(
    df['price_paid'] >= optimal_price,
    (optimal_price - df['cost']) * df['quantity'],
    0.0
)
weekly_profit_opt     = df.groupby('iso_week')['profit_optimal'].sum().sort_index()
weekly_cum_profit_opt = weekly_profit_opt.cumsum()

# ---------------------------------------------------------------
# ④  HELPER: REGRET  vs baseline
# ---------------------------------------------------------------
def compute_regret(model_total_profit):
    """
    Regret = (baseline_optimal_profit) - (model_profit)
    Positive regret means the model earned less than the hindsight best.
    """
    return optimal_profit - model_total_profit

# Example usage:
#   my_model_profit = 1_200_000
#   print("Regret:", compute_regret(my_model_profit))

# ---------------------------------------------------------------
# ⑤  VISUALISATIONS
# ---------------------------------------------------------------
fig, ax = plt.subplots(1, 2, figsize=(12, 4))

# (a) Total‑Profit curve
ax[0].plot(price_grid, profits, lw=2, label='Total profit')
ax[0].axvline(optimal_price, color='red', ls='--', 
              label=f'Optimal = {optimal_price:.2f}')
ax[0].set_xlabel('Price')
ax[0].set_ylabel('Total Profit')
ax[0].set_title('Price vs Total Profit (hindsight)')
ax[0].legend()

# (b) Empirical demand curve  (avg qty in 20 price bins)
bins          = np.linspace(min_price, max_price, 20)
df['price_bin'] = pd.cut(df['price_paid'], bins, labels=False, include_lowest=True)
avg_qty       = df.groupby('price_bin')['quantity'].mean()
bin_centers   = 0.5 * (bins[:-1] + bins[1:])

ax[1].plot(bin_centers, avg_qty, marker='o')
ax[1].set_xlabel('Price bin centre')
ax[1].set_ylabel('Avg Quantity')
ax[1].set_title('Empirical Demand Curve (binned)')

plt.tight_layout()
plt.show()

# ---------------------------------------------------------------
# ⑥  RESULTS OBJECTS  (ready for comparison)
# ---------------------------------------------------------------
baseline_summary = {
    'optimal_price'           : optimal_price,
    'optimal_total_profit'    : optimal_profit,
    'price_grid'              : price_grid,
    'profit_curve'            : profits,
    'weekly_profit_series'    : weekly_profit_opt,
    'weekly_cum_profit_series': weekly_cum_profit_opt
}

# At this point you can:
#   • compare any model’s TOTAL profit to `optimal_total_profit`
#   • compute REGRET via `compute_regret( model_profit )`
#   • compare WEEK‑BY‑WEEK profits to `weekly_profit_opt`
#
# Example: evaluating a model that produced weekly_profit_model (pd.Series)
#   common_weeks = weekly_profit_model.index.intersection(weekly_profit_opt.index)
#   regret_series = weekly_profit_opt[common_weeks] - weekly_profit_model[common_weeks]
#   total_regret  = regret_series.sum()
#   cum_regret    = regret_series.cumsum()
#
# …then plot cum_regret for visual comparison.

# Save baseline curve if needed:
# baseline_results.to_csv("empirical_baseline_curve.csv", index=False)
