In [1]:
import numpy as np

In [2]:
# Dataset
data = {
    'ad_budget': [200, 300, 150, 400, 250],
    'social_media': [50, 70, 30, 80, 60],
    'sales': [1000, 1500, 800, 2000, 1200]
}

In [3]:
# Features (X) and Target (y)
X = np.array([data['ad_budget'], data['social_media']]).T  # Shape: (n_samples, n_features)
y = np.array(data['sales'])                                # Shape: (n_samples,)

In [4]:
# Add a column of ones for the intercept term
X = np.c_[np.ones(X.shape[0]), X]  # Shape: (n_samples, n_features + 1)

In [5]:
# Split into training and testing sets (80% train, 20% test)
split_index = int(0.8 * X.shape[0])
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [6]:
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

In [7]:
def r2_score(y_true, y_pred):
    ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    return 1 - (ss_residual / ss_total)

In [8]:
# Task 1: Linear Regression (Closed-form solution)
def linear_regression(X, y):
    # Solve for coefficients using the normal equation: theta = (X.T @ X)^(-1) @ X.T @ y
    theta = np.linalg.inv(X.T @ X) @ X.T @ y
    return theta

In [9]:
# Train Linear Regression
theta_linear = linear_regression(X_train, y_train)
c, m1, m2 = theta_linear  # Intercept (c), slope for ad_budget (m1), slope for social_media (m2)

In [10]:
# Predictions using y = m1*x1 + m2*x2 + c
y_pred_linear = m1 * X_test[:, 1] + m2 * X_test[:, 2] + c

In [11]:
# Evaluate Linear Regression
rmse_linear = rmse(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

  return 1 - (ss_residual / ss_total)


In [12]:
# Task 2: Lasso Regression (Gradient Descent with L1 Penalty)
def lasso_regression(X, y, alpha=0.1, learning_rate=0.01, epochs=1000):
    n_samples, n_features = X.shape
    theta = np.zeros(n_features)  # Initialize coefficients to zero

    for _ in range(epochs):
        y_pred = X @ theta
        error = y_pred - y

        # Gradient of the loss function with L1 penalty
        gradient = (X.T @ error) / n_samples + alpha * np.sign(theta)

        # Update coefficients
        theta -= learning_rate * gradient

    return theta

In [13]:
# Train Lasso Regression
alpha = 0.1
theta_lasso = lasso_regression(X_train, y_train, alpha=alpha)
c_lasso, m1_lasso, m2_lasso = theta_lasso  # Intercept (c), slope for ad_budget (m1), slope for social_media (m2)

  gradient = (X.T @ error) / n_samples + alpha * np.sign(theta)
  theta -= learning_rate * gradient


In [14]:
# Predictions using y = m1*x1 + m2*x2 + c
y_pred_lasso = m1_lasso * X_test[:, 1] + m2_lasso * X_test[:, 2] + c_lasso

In [15]:
# Evaluate Lasso Regression
rmse_lasso = rmse(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

In [16]:
# Task 3: Compare Model Performance
print("Linear Regression:")
print(f"Intercept (c): {c:.2f}")
print(f"Slope for ad_budget (m1): {m1:.2f}")
print(f"Slope for social_media (m2): {m2:.2f}")
print(f"RMSE: {rmse_linear:.2f}")
print(f"R² Score: {r2_linear:.2f}\n")

Linear Regression:
Intercept (c): 72.41
Slope for ad_budget (m1): 5.41
Slope for social_media (m2): -2.93
RMSE: 50.00
R² Score: -inf



In [17]:
print("Lasso Regression (α = 0.1):")
print(f"Intercept (c): {c_lasso:.2f}")
print(f"Slope for ad_budget (m1): {m1_lasso:.2f}")
print(f"Slope for social_media (m2): {m2_lasso:.2f}")
print(f"RMSE: {rmse_lasso:.2f}")
print(f"R² Score: {r2_lasso:.2f}")

Lasso Regression (α = 0.1):
Intercept (c): nan
Slope for ad_budget (m1): nan
Slope for social_media (m2): nan
RMSE: nan
R² Score: nan
