In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error


# 1. Dataset

data = {
    "GoogleAds_(₹1000s)": [1, 2, 3, 1, 2],
    "BooksSold": [100, 130, 160, 110, 140]
}
df = pd.DataFrame(data)

In [None]:
# 2. Baseline value

baseline = df["BooksSold"].mean()
print(f"Baseline value: {baseline}")

Baseline value: 128.0


In [None]:
# 3. Linear Regression

X = df[["GoogleAds_(₹1000s)"]]
y = df["BooksSold"]

model = LinearRegression()
model.fit(X, y)

intercept = model.intercept_
coef = model.coef_[0]

In [None]:
df["Predicted_BooksSold"] = model.predict(X).round(4)
df["Baseline"] = round(baseline, 4)
df["SHAP"] = (df["Predicted_BooksSold"] - baseline).round(4)
df["Baseline_plus_SHAP"] = (df["Baseline"] + df["SHAP"]).round(4)

In [None]:
# 5. Residuals & Over/Under

df["Residual_(Actual-Predicted)"] = (df["BooksSold"] - df["Predicted_BooksSold"]).round(4)
df["Over_Under"] = df["Residual_(Actual-Predicted)"].apply(
    lambda r: "Underprediction (model too low)" if r > 0
    else ("Overprediction (model too high)" if r < 0 else "Exact")
)

In [None]:
# 6. Model performance

r2 = r2_score(y, df["Predicted_BooksSold"])
mse = mean_squared_error(y, df["Predicted_BooksSold"])
mae = mean_absolute_error(y, df["Predicted_BooksSold"])

In [None]:
# 7. Output results

print("Linear Regression Model")
print(f"Predicted_BooksSold = {intercept:.4f} + {coef:.4f} × GoogleAds_(₹1000s)")
print(f"Intercept: {intercept:.4f}")
print(f"Coefficient: {coef:.4f} (books per ₹1000 Google Ads)")
print("\nBaseline")
print(f"Baseline (mean BooksSold): {baseline:.4f}")

print("\nModel Performance")
print(f"R-squared: {r2:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")

print("\nDetailed Table")
print(df.to_string(index=False))

Linear Regression Model
Predicted_BooksSold = 77.8571 + 27.8571 × GoogleAds_(₹1000s)
Intercept: 77.8571
Coefficient: 27.8571 (books per ₹1000 Google Ads)

Baseline
Baseline (mean BooksSold): 128.0000

Model Performance
R-squared: 0.9530
MSE: 21.4286
MAE: 4.2857

Detailed Table
 GoogleAds_(₹1000s)  BooksSold  Predicted_BooksSold  Baseline     SHAP  Baseline_plus_SHAP  Residual_(Actual-Predicted)                      Over_Under
                  1        100             105.7143     128.0 -22.2857            105.7143                      -5.7143 Overprediction (model too high)
                  2        130             133.5714     128.0   5.5714            133.5714                      -3.5714 Overprediction (model too high)
                  3        160             161.4286     128.0  33.4286            161.4286                      -1.4286 Overprediction (model too high)
                  1        110             105.7143     128.0 -22.2857            105.7143                       4