In [1]:
import torch
import statsmodels.api as sm
from torch.nn import MSELoss

In [2]:
# Data generation
num_features = 5
num_data_points = 5000
x = torch.rand(num_data_points, num_features)
true_weights = torch.randint(5, 10, size=(1, num_features)).float()
true_bias = torch.randint(5, 10, size=(1,)).float()
error = torch.randn((num_data_points, 1))
y_target = true_bias + x @ true_weights.T + error


In [3]:
# Convert PyTorch tensors to NumPy arrays for use with statsmodels
import pandas as pd
x_np = x.detach().numpy()
y_target_np = y_target.detach().numpy().squeeze()

# Add a column of ones to X for the intercept (bias)
x_np_with_intercept = sm.add_constant(x_np)

# Fit the OLS model
ols_model = sm.OLS(y_target_np, x_np_with_intercept).fit()

# Extract the parameter estimates from OLS
ols_weights = ols_model.params[1:]  # Coefficients (weights)
ols_bias = ols_model.params[0]  # Intercept (bias)

params_df = pd.DataFrame({
    'actual': true_weights.detach().numpy().squeeze(),
    'ols_weights': ols_weights
})
params_df

Unnamed: 0,actual,ols_weights
0,6.0,5.905957
1,8.0,8.019606
2,7.0,7.005886
3,5.0,4.896109
4,7.0,7.034547


In [11]:
## now we do with back propagation.  We initialise the weights and biases
weight_initial = torch.randn((1, num_features), requires_grad=True)
bias_initial = torch.randn(1, requires_grad=True)
print(f"Weight shape {weight_initial}")
print(f"Bias Shape {bias_initial}")
batch_size = 128
lr = 0.001
epochs = 100000

Weight shape tensor([[ 0.5978, -0.5340, -0.7392,  0.4005,  0.4270]], requires_grad=True)
Bias Shape tensor([0.4175], requires_grad=True)


In [13]:
uniform_probs = torch.ones(num_data_points) / num_data_points  # Uniform probabilities over 10 outcomes
dist = torch.distributions.Categorical(probs=uniform_probs)
mse_loss_f = MSELoss()

for cur_epoch in range(epochs):
    ## do the forward pass
    cur_batch_index = dist.sample((batch_size,))
    cur_batch_x = x[cur_batch_index,]
    cur_batch_y = y_target[cur_batch_index,]
    #print(cur_batch_x[:2,])
    cur_batch_predict_y = cur_batch_x @ weight_initial.T + bias_initial
    cur_batch_loss = mse_loss_f(cur_batch_predict_y,cur_batch_y )
    if cur_epoch%500 == 0:
        print(f"Loss of current batch: {cur_batch_loss}")
    cur_batch_loss.backward()
    with torch.no_grad():
        weight_initial -= lr*weight_initial.grad
        bias_initial -=  lr * bias_initial.grad
    ## make zero grad to avoide gradient accumlation
    weight_initial.grad.zero_()
    bias_initial.grad.zero_()
    

Loss of current batch: 1.1736273765563965
Loss of current batch: 1.0449539422988892
Loss of current batch: 1.0110138654708862
Loss of current batch: 0.918136715888977
Loss of current batch: 0.9421952366828918
Loss of current batch: 0.9346218705177307
Loss of current batch: 1.1740509271621704
Loss of current batch: 0.9785818457603455
Loss of current batch: 0.7621504068374634
Loss of current batch: 0.9516388177871704
Loss of current batch: 0.979264497756958
Loss of current batch: 1.201424241065979
Loss of current batch: 0.9267152547836304
Loss of current batch: 0.9184979200363159
Loss of current batch: 0.8523565530776978
Loss of current batch: 0.9455530047416687
Loss of current batch: 0.8586906790733337
Loss of current batch: 1.1558098793029785
Loss of current batch: 1.1044906377792358
Loss of current batch: 1.0794744491577148
Loss of current batch: 1.093479871749878
Loss of current batch: 0.9180593490600586
Loss of current batch: 0.8200418949127197
Loss of current batch: 1.0174242258071

In [17]:
weight_initial, bias_initial

(tensor([[5.9048, 8.0205, 7.0086, 4.8951, 7.0362]], requires_grad=True),
 tensor([9.0788], requires_grad=True))

In [18]:
params_df

Unnamed: 0,actual,ols_weights
0,6.0,5.905957
1,8.0,8.019606
2,7.0,7.005886
3,5.0,4.896109
4,7.0,7.034547


In [19]:
true_bias

tensor([9.])