- x: Feature (Size of the house in square feet)
- y: Target variable (Price of the house in dollars)
- F(x): The current prediction model
- r: Residuals (Errors) between actual and predicted values
- h_t(x): Weak learner (Decision Stump) at iteration t
- γ_t: Step size (Learning rate) at iteration t
- T: Total number of boosting iterations


In [5]:
import numpy as np
import pandas as pd

# Sample Data
data = {
    'Size': [1500, 1600, 1700, 1800, 1900],
    'Bedrooms': [3, 3, 3, 2, 4],
    'Price': [300000, 320000, 340000, 360000, 380000]
}

df = pd.DataFrame(data)

# Feature and Target
X = df['Size'].values
y = df['Price'].values

print(df)

   Size  Bedrooms   Price
0  1500         3  300000
1  1600         3  320000
2  1700         3  340000
3  1800         2  360000
4  1900         4  380000


## Init the model

In [49]:
F0_x = float(df['Price'].mean()) # Initial value of F0(x)

num_iters = 10 # Number of iterations

## Calculate Residuals

In [50]:
# Initialize list to store weak learners and their step sizes
learners = []
gammas = []

In [51]:
# Current model predictions
F_current = np.full_like(y, F0_x, dtype=np.float64)

F_current

array([340000., 340000., 340000., 340000., 340000.])

In [52]:
print(df)

for t in range(1, num_iters + 1):
  print(f"\n--- Iteration {t} ---")
  
  # Compute residuals
  residuals = y - F_current
  print(f"Residuals: {residuals}")

  # Fit a decision stump to residuals
  # Find the best threshold that minimizes squared error
  best_threshold = None
  best_error = float('inf')
  best_c_left = None
  best_c_right = None

  thresholds = np.unique(X)

  for s in thresholds:
    left = X < s
    right = X >= s

    # # If either side has no data, skip
    if np.sum(left) == 0 or np.sum(right) == 0:
      continue
    
    print(f"Threshold: {s}")
    # # Calculate mean residuals for each split
    c_left = residuals[left].mean()
    c_right = residuals[right].mean()

    error = np.sum((residuals[left] - c_left) ** 2) + np.sum((residuals[right] - c_right) ** 2)

    if error < best_error:
      best_error = error
      best_threshold = s
      best_c_left = c_left
      best_c_right = c_right

  print(f"Best Threshold: {best_threshold}")
  print(f"c_left: {best_c_left}, c_right: {best_c_right}")
  print(f"Best Error: {best_error}")

  def weak_learner(x):
    return np.where(x < best_threshold, best_c_left, best_c_right)
  
  # Predict residuals using the weak learner
  h_t = weak_learner(X)
  print(f"Weak Learner Predictions: {h_t}")

  gamma = np.sum(residuals*h_t) / np.sum(h_t**2)

  F_current += gamma * h_t

  learners.append({
    'threshold': best_threshold,
    'c_left': best_c_left,
    'c_right': best_c_right
  })
  gammas.append(gamma)

# Final Model Prediction
print("\n--- Final Model ---")
print(f"F(x): {F_current}")

   Size   Price
0  1500  300000
1  1600  320000
2  1700  340000
3  1800  360000
4  1900  380000

--- Iteration 1 ---
Residuals: [-40000. -20000.      0.  20000.  40000.]
Threshold: 1600
Threshold: 1700
Threshold: 1800
Threshold: 1900
Best Threshold: 1700
c_left: -30000.0, c_right: 20000.0
Best Error: 1000000000.0
Weak Learner Predictions: [-30000. -30000.  20000.  20000.  20000.]

--- Iteration 2 ---
Residuals: [-10000.  10000. -20000.      0.  20000.]
Threshold: 1600
Threshold: 1700
Threshold: 1800
Threshold: 1900
Best Threshold: 1900
c_left: -5000.0, c_right: 20000.0
Best Error: 500000000.0
Weak Learner Predictions: [-5000. -5000. -5000. -5000. 20000.]

--- Iteration 3 ---
Residuals: [ -5000.  15000. -15000.   5000.      0.]
Threshold: 1600
Threshold: 1700
Threshold: 1800
Threshold: 1900
Best Threshold: 1700
c_left: 5000.0, c_right: -3333.3333333333335
Best Error: 416666666.6666666
Weak Learner Predictions: [ 5000.          5000.         -3333.33333333 -3333.33333333
 -3333.33333333]

In [53]:
print(f"Gamma: {gammas}")
print(f"H: {learners}")

Gamma: [np.float64(1.0), np.float64(1.0), np.float64(0.9999999999999998), np.float64(1.0000000000000002), np.float64(1.0000000000000002), np.float64(1.0), np.float64(0.9999999999999999), np.float64(1.0), np.float64(1.0), np.float64(1.0)]
H: [{'threshold': np.int64(1700), 'c_left': np.float64(-30000.0), 'c_right': np.float64(20000.0)}, {'threshold': np.int64(1900), 'c_left': np.float64(-5000.0), 'c_right': np.float64(20000.0)}, {'threshold': np.int64(1700), 'c_left': np.float64(5000.0), 'c_right': np.float64(-3333.3333333333335)}, {'threshold': np.int64(1600), 'c_left': np.float64(-10000.0), 'c_right': np.float64(2499.9999999999854)}, {'threshold': np.int64(1700), 'c_left': np.float64(3750.0), 'c_right': np.float64(-2500.0000000000196)}, {'threshold': np.int64(1800), 'c_left': np.float64(-3888.8888888888955), 'c_right': np.float64(5833.333333333314)}, {'threshold': np.int64(1700), 'c_left': np.float64(3888.888888888876), 'c_right': np.float64(-2592.5925925926035)}, {'threshold': np.int6

In [59]:
# Function to make predictions with the trained model
def predict(x):

  F = np.full_like(x, F0_x, dtype=np.float64)
  print(f"Starting F: {F}")

  for learner, gamma in zip(learners, gammas):
    h = np.where(x < learner['threshold'], learner['c_left'], learner['c_right'])
    print(f"x: {x}. h: {h}. Gamma: {gamma}. Learner: {learner}")
    F += gamma * h
    print(f"F: {F} \n") 

  return F

# Example Prediction
test_size = 1750
predicted_price = predict(np.array([test_size]))
print(f"\nPredicted price for house size {test_size} sq ft: ${predicted_price[0]:,.2f}")

Starting F: [340000.]
x: [1750]. h: [20000.]. Gamma: 1.0. Learner: {'threshold': np.int64(1700), 'c_left': np.float64(-30000.0), 'c_right': np.float64(20000.0)}
F: [360000.] 

x: [1750]. h: [-5000.]. Gamma: 1.0. Learner: {'threshold': np.int64(1900), 'c_left': np.float64(-5000.0), 'c_right': np.float64(20000.0)}
F: [355000.] 

x: [1750]. h: [-3333.33333333]. Gamma: 0.9999999999999998. Learner: {'threshold': np.int64(1700), 'c_left': np.float64(5000.0), 'c_right': np.float64(-3333.3333333333335)}
F: [351666.66666667] 

x: [1750]. h: [2500.]. Gamma: 1.0000000000000002. Learner: {'threshold': np.int64(1600), 'c_left': np.float64(-10000.0), 'c_right': np.float64(2499.9999999999854)}
F: [354166.66666667] 

x: [1750]. h: [-2500.]. Gamma: 1.0000000000000002. Learner: {'threshold': np.int64(1700), 'c_left': np.float64(3750.0), 'c_right': np.float64(-2500.0000000000196)}
F: [351666.66666667] 

x: [1750]. h: [-3888.88888889]. Gamma: 1.0. Learner: {'threshold': np.int64(1800), 'c_left': np.float6

In [34]:
import numpy as np
import pandas as pd

# Sample Data
data = {
    'Size': [1500, 1600, 1700, 1800, 1900],
    'Price': [300000, 320000, 340000, 360000, 380000]
}

df = pd.DataFrame(data)

# Feature and Target
X = df['Size'].values
y = df['Price'].values

# Number of Boosting Iterations
T = 2

# Initialize the model with the mean of y
F_0 = np.mean(y)
print(f"Initial model F0(x): {F_0}")

# Initialize list to store weak learners and their step sizes
learners = []
gammas = []

# Current model predictions
F_current = np.full_like(y, F_0, dtype=np.float64)

for t in range(1, T + 1):
    print(f"\n--- Iteration {t} ---")
    
    # Compute residuals
    residuals = y - F_current
    print(f"Residuals: {residuals}")
    
    # Fit a decision stump to residuals
    # Find the best threshold that minimizes squared error
    best_threshold = None
    best_error = float('inf')
    best_c_left = None
    best_c_right = None
    
    # Sort the unique values of X to consider possible thresholds
    thresholds = np.unique(X)
    
    for s in thresholds:
        # Split data
        left_mask = X < s
        right_mask = X >= s
        
        # If either side has no data, skip
        if np.sum(left_mask) == 0 or np.sum(right_mask) == 0:
            continue
        
        # Calculate mean residuals for each split
        c_left = residuals[left_mask].mean()
        c_right = residuals[right_mask].mean()
        
        # Calculate squared error
        error = np.sum((residuals[left_mask] - c_left) ** 2) + \
                np.sum((residuals[right_mask] - c_right) ** 2)
        
        if error < best_error:
            best_error = error
            best_threshold = s
            best_c_left = c_left
            best_c_right = c_right
    
    print(f"Best Threshold: {best_threshold}")
    print(f"c_left: {best_c_left}, c_right: {best_c_right}")
    print(f"Best Error: {best_error}")
    
    # Define the weak learner function
    def weak_learner(x):
        return np.where(x < best_threshold, best_c_left, best_c_right)
    
    # Predict residuals using the weak learner
    h_t = weak_learner(X)
    print(f"Weak Learner Predictions: {h_t}")
    
    # Calculate gamma (step size)
    numerator = np.sum(residuals * h_t)
    denominator = np.sum(h_t ** 2)
    gamma_t = numerator / denominator
    print(f"Gamma_t (Step Size): {gamma_t}")
    
    # Update the model
    F_current += gamma_t * h_t
    print(f"Updated F(x): {F_current}")
    
    # Store the weak learner and gamma
    learners.append({
        'threshold': best_threshold,
        'c_left': best_c_left,
        'c_right': best_c_right
    })
    gammas.append(gamma_t)

# Final Model Prediction
print("\n--- Final Model ---")
print(f"F(x): {F_current}")

# Function to make predictions with the trained model
def predict(x):
    F = np.full_like(x, F_0, dtype=np.float64)
    for learner, gamma in zip(learners, gammas):
        h = np.where(x < learner['threshold'], learner['c_left'], learner['c_right'])
        F += gamma * h
    return F

# Example Prediction
test_size = 1750
predicted_price = predict(np.array([test_size]))
print(f"\nPredicted price for house size {test_size} sq ft: ${predicted_price[0]:,.2f}")


Initial model F0(x): 340000.0

--- Iteration 1 ---
Residuals: [-40000. -20000.      0.  20000.  40000.]
Best Threshold: 1700
c_left: -30000.0, c_right: 20000.0
Best Error: 1000000000.0
Weak Learner Predictions: [-30000. -30000.  20000.  20000.  20000.]
Gamma_t (Step Size): 1.0
Updated F(x): [310000. 310000. 360000. 360000. 360000.]

--- Iteration 2 ---
Residuals: [-10000.  10000. -20000.      0.  20000.]
Best Threshold: 1900
c_left: -5000.0, c_right: 20000.0
Best Error: 500000000.0
Weak Learner Predictions: [-5000. -5000. -5000. -5000. 20000.]
Gamma_t (Step Size): 1.0
Updated F(x): [305000. 305000. 355000. 355000. 380000.]

--- Final Model ---
F(x): [305000. 305000. 355000. 355000. 380000.]

Predicted price for house size 1750 sq ft: $355,000.00
