In [5]:
#We will build a model that answers:

#What happens if marketing increases 10%?
#What if discount increases?
#What if price changes?
#How does profit react?

In [7]:
import pandas as pd
import numpy as np

from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [9]:
df = pd.read_csv("../data/transactions.csv")

df.head()

Unnamed: 0,transaction_id,customer_id,product_category,price,discount,marketing_spend,quantity,date,cost,revenue,profit
0,1,2,Fashion,3285.598778,0.278717,163.837457,1,2023-05-20,1971.359267,2369.847401,234.650676
1,2,4832,Home,3338.188546,0.234976,174.348861,2,2023-11-22,2002.913127,5107.587848,927.412732
2,3,8452,Beauty,1164.112366,0.285472,138.930571,3,2024-08-17,698.46742,2495.371358,261.038527
3,4,2341,Home,3089.20734,0.068202,184.37836,1,2023-07-24,1853.524404,2878.517729,840.614965
4,5,3192,Electronics,2350.408189,0.08979,167.970229,2,2023-04-20,1410.244913,4278.731204,1290.271149


In [10]:
#predict ->profit

X = df[["price", "discount", "marketing_spend", "quantity"]]
y = df["profit"]

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [15]:
model = XGBRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=4,
    random_state=42
)

model.fit(X_train, y_train)

In [16]:
predictions = model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, predictions))
print("RMSE:", rmse)

RMSE: 28.294405987731793


In [17]:
# Current average values
base_input = X.mean().values.reshape(1, -1)

base_profit = model.predict(base_input)[0]
print("Base Profit:", base_profit)

Base Profit: 1145.9196


In [18]:
#Increase Marketing by 10%
scenario1 = base_input.copy()
scenario1[0][2] *= 1.10  # marketing_spend index = 2

profit_scenario1 = model.predict(scenario1)[0]

print("Profit after 10% marketing increase:", profit_scenario1)
print("Profit Change:", profit_scenario1 - base_profit)

Profit after 10% marketing increase: 1122.0094
Profit Change: -23.910156


In [23]:
#Increase Discount by 5%
scenario2 = base_input.copy()
scenario2[0][1] += 0.05  # discount index = 1

profit_scenario2 = model.predict(scenario2)[0]

print("Profit after 5% discount increase:", profit_scenario2)
print("Profit Change:", profit_scenario2 - base_profit)

Profit after 5% discount increase: 824.1919
Profit Change: -321.72766


In [25]:
# ============================================================
# DAY 4 - PROFIT SENSITIVITY & WHAT-IF SIMULATION
# PROJECT: E-Commerce Executive AI Decision Agent
# ============================================================

# ------------------------------------------------------------
# OBJECTIVE:
# Build a profit prediction model using XGBoost Regression.
# Use it to simulate business scenarios such as:
#   - Increasing marketing spend
#   - Increasing discount
#   - Changing price
#
# This transforms the project from prediction to decision intelligence.
# ------------------------------------------------------------


# ============================================================
# BUSINESS PROBLEM
# ============================================================

# We want to model:
#
#     Profit = f(price, discount, marketing_spend, quantity)
#
# Instead of just predicting revenue,
# we simulate strategic decisions BEFORE implementing them.
#
# This allows executives to:
# - Estimate ROI impact
# - Avoid risky pricing decisions
# - Optimize marketing budgets


# ============================================================
# IMPORTANT LIBRARIES
# ============================================================

# pandas
# - Data loading and manipulation

# numpy
# - Mathematical operations

# XGBRegressor (from xgboost)
# - Gradient Boosting Regression model
# - Strong for tabular structured data

# train_test_split
# - Splits dataset into training and testing

# mean_squared_error
# - Used to evaluate regression performance


# ============================================================
# MACHINE LEARNING CONCEPTS USED
# ============================================================

# 1. REGRESSION
#    Predicting a continuous variable (profit).

# 2. FEATURES (Independent Variables)
#    price
#    discount
#    marketing_spend
#    quantity

# 3. TARGET (Dependent Variable)
#    profit

# 4. TRAIN-TEST SPLIT
#    80% Training
#    20% Testing
#    Used to evaluate model performance on unseen data.

# 5. XGBOOST REGRESSION
#    Uses Gradient Boosting:
#    - Builds trees sequentially
#    - Each tree reduces previous error
#    - Minimizes loss function


# ============================================================
# MODEL PARAMETERS EXPLANATION
# ============================================================

# n_estimators:
#    Number of boosting trees.
#    More trees = stronger learning capability.

# learning_rate:
#    Controls how fast the model learns.
#    Smaller = slower but more stable learning.

# max_depth:
#    Controls tree complexity.
#    Higher = more complex model (risk of overfitting).

# random_state:
#    Ensures reproducibility.


# ============================================================
# EVALUATION METRIC
# ============================================================

# RMSE (Root Mean Squared Error)
#
# Formula:
#     RMSE = sqrt( mean( (Actual - Predicted)^2 ) )
#
# Interpretation:
#     Average prediction error in same unit as profit.
#
# Example:
#     RMSE = 15000
#     Means model prediction error ≈ ₹15,000 per transaction.


# ============================================================
# WHAT-IF SIMULATION (CORE STRATEGIC FEATURE)
# ============================================================

# After training model, we simulate business changes.

# Step 1:
#   Take average input values as baseline scenario.

# Step 2:
#   Modify one variable (e.g., increase marketing by 10%).

# Step 3:
#   Use model.predict() to estimate new profit.

# Step 4:
#   Compare new profit vs baseline profit.

# Example Scenarios:
#
# 1. Increase marketing spend by 10%
#    -> Check profit change
#
# 2. Increase discount by 5%
#    -> Check profit impact
#
# 3. Change price
#    -> Observe elasticity effect


# ============================================================
# BUSINESS INTERPRETATION
# ============================================================

# If marketing increase leads to higher profit:
#     Strategy is profitable.
#
# If discount increase reduces profit:
#     Discount may increase revenue but reduce margin.
#
# This model quantifies financial impact BEFORE decision execution.


# ============================================================
# STRATEGIC VALUE IN PROJECT
# ============================================================

# This transforms project from:
#     "Predictive Model"
#
# To:
#     "Decision Intelligence System"
#
# Because now system can:
# - Recommend optimal marketing level
# - Estimate profit risk
# - Support CFO-level planning


# ============================================================
# INTERVIEW-READY EXPLANATION
# ============================================================

# "I built a profit sensitivity model using XGBoost regression
# to simulate strategic scenarios such as marketing increases
# and discount changes. This allowed quantitative estimation
# of ROI impact, transforming predictive analytics into
# executive decision intelligence."

# ============================================================
# END OF DAY 4 NOTES
# ============================================================

In [28]:
import joblib

joblib.dump(model, "../models/profit_model.pkl")

print("Profit model saved successfully ✅")

Profit model saved successfully ✅


In [30]:
import os
print(os.listdir("../models"))

['churn_model.pkl', 'profit_model.pkl', 'revenue_model.pkl']
