In [1]:
# CELL 1: Setup
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import r2_score
import plotly.express as px

# CELL 2: Prepare data
df = pd.read_csv("Data1/cleaned_sales.csv", parse_dates=["orderdate"])
df.columns = [c.lower() for c in df.columns]

# ✅ Use profit_est instead of profit
df["profit_margin"] = np.where(df["sales"] > 0, df["profit_est"] / df["sales"], 0).clip(-1, 1)

cat = ["productline"]
num = ["discount_pct", "priceeach", "inventory_age_days"]

pre = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat),
    ("poly", Pipeline([("sel", "passthrough"), ("poly", PolynomialFeatures(degree=2, include_bias=False))]), ["discount_pct"]),
    ("num", StandardScaler(), ["priceeach","inventory_age_days"])
], remainder="drop")

model = ElasticNetCV(l1_ratio=[0.1,0.5,0.9], alphas=None, cv=5, random_state=42, max_iter=20000)

pipe = Pipeline([("pre", pre), ("enet", model)])

X = df[cat + num]
y = df["profit_margin"].fillna(0)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)

pipe.fit(X_tr, y_tr)
y_pred = pipe.predict(X_te)
print(f"Elasticity model R^2 (holdout): {r2_score(y_te, y_pred):.3f}")

# CELL 3: Interpreting discount effect
# Recover coefficients for discount terms
ohe = pipe.named_steps["pre"].named_transformers_["cat"]
feature_names = (list(ohe.get_feature_names_out(cat)) +
                 ["discount_pct","discount_pct^2","priceeach","inventory_age_days"])

coef = pipe.named_steps["enet"].coef_
coef_map = dict(zip(feature_names, coef))

b1 = coef_map.get("discount_pct", 0.0)
b2 = coef_map.get("discount_pct^2", 0.0)
print(f"Interpretation: Every +10% discount (~+0.10) changes margin by ~{b1*0.10:.3f} (linear term).")
if b2 != 0:
    print(f"Curvature present (quadratic={b2:.3f}): diminishing returns if b2 < 0.")

# CELL 4: Optimal discount per product line (solve quadratic for max margin)
ohe_names = list(ohe.get_feature_names_out(cat))
line_effects = {n.replace("productline_",""): coef_map.get(n,0.0) for n in ohe_names}

rows = []
for line in sorted(line_effects.keys()):
    d_star = None
    if b2 != 0:
        d_star = max(0.0, min(0.6, -b1/(2*b2)))  # clip to 0–60% practical range
    rows.append({
        "productline": line,
        "opt_discount_pct": d_star,
        "linear_effect_per_10pp": b1*0.10
    })

opt_df = pd.DataFrame(rows)
opt_df.to_csv("elasticity_optimal_discount_by_productline.csv", index=False)
print(opt_df.head(10))

# CELL 5: Monthly Sales & Profit Trends
df["order_month"] = df["orderdate"].dt.to_period("M").dt.to_timestamp()
monthly = df.groupby("order_month")[["sales","profit_est"]].sum().reset_index()

fig = px.line(
    monthly,
    x="order_month",
    y=["sales", "profit_est"],
    title="Monthly Sales & Profit Trends"
)
fig.update_layout(
    xaxis_title="Order Month",
    yaxis_title="Amount (Sales & Profit)",
    legend_title="Metrics",
    template="plotly_white"
)
fig.show()


Elasticity model R^2 (holdout): 0.873
Interpretation: Every +10% discount (~+0.10) changes margin by ~-0.080 (linear term).
Curvature present (quadratic=-0.490): diminishing returns if b2 < 0.
        productline  opt_discount_pct  linear_effect_per_10pp
0      Classic Cars               0.0               -0.080381
1       Motorcycles               0.0               -0.080381
2            Planes               0.0               -0.080381
3             Ships               0.0               -0.080381
4            Trains               0.0               -0.080381
5  Trucks and Buses               0.0               -0.080381
6      Vintage Cars               0.0               -0.080381
