In [1]:
import numpy as np

X = np.random.rand(10000,5)
m = np.random.randint(low = 1, high = 20,size = (5,1))  #parametri random tra low e high
q = np.random.rand(1)
y = (X @ m) + q 

noise = np.random.randn(y.shape[0], y.shape[1])
y = y + noise

X.shape, m.shape, q.shape, y.shape

((10000, 5), (5, 1), (1,), (10000, 1))

In [2]:
X = np.concatenate([X , np.ones((X.shape[0],1))], axis = 1)
m = np.concatenate([m,q.reshape(1,-1)],axis = 0)

In [3]:
def partial_derivative(X_batch, y_batch, m_stat):

  y_pred = X_batch @ m_stat
  n = len(X_batch)

  df_dm =  (-2/n) * (X_batch.T @ (y_batch - y_pred))
  df_dm = df_dm.reshape(len(df_dm),-1)

  return df_dm

In [4]:
def mean_squared_error(X,y,m_stat):
  y_pred = X @ m_stat
  mse = np.sum(((y_pred - y)**2),axis = 0) / len(X)
  
  return mse

In [5]:
def training(X, y, batch_size, lr, epochs):
  
  for epoch in range(epochs):

    # random initial statistics
    if epoch == 0:
      m_stat = np.random.rand(X.shape[1],1)

    # shuffle X and y using same permutation
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)

    X = X[indices]
    y = y[indices]

    # store comulative derivative
    cumulative_derivative = np.zeros((X.shape[1],1))

    for batch in range(len(X)//batch_size):
      start = batch*batch_size
      stop = (batch*batch_size) + batch_size

      X_batch = X[start:stop]
      y_batch = y[start:stop]
      
      # derivative
      cumulative_derivative = cumulative_derivative + partial_derivative(X_batch, y_batch, m_stat)

      # updating rule
      m_stat = m_stat - (lr*cumulative_derivative)
    
    print(f"epoch: {epoch} ----> MSE: {mean_squared_error(X,y,m_stat)}")
      
  return m_stat

In [6]:
batch_size = 1024
lr = 0.01
epochs = 500

m_stat = training(X,y, batch_size,lr,epochs)

epoch: 0 ----> MSE: [123.26104414]
epoch: 1 ----> MSE: [37.05482752]
epoch: 2 ----> MSE: [18.36066627]
epoch: 3 ----> MSE: [13.65401493]
epoch: 4 ----> MSE: [11.61837811]
epoch: 5 ----> MSE: [10.35606028]
epoch: 6 ----> MSE: [9.30584033]
epoch: 7 ----> MSE: [8.40373839]
epoch: 8 ----> MSE: [7.61179216]
epoch: 9 ----> MSE: [6.90993347]
epoch: 10 ----> MSE: [6.29273157]
epoch: 11 ----> MSE: [5.75447944]
epoch: 12 ----> MSE: [5.27106568]
epoch: 13 ----> MSE: [4.84793968]
epoch: 14 ----> MSE: [4.47216597]
epoch: 15 ----> MSE: [4.14566923]
epoch: 16 ----> MSE: [3.85200668]
epoch: 17 ----> MSE: [3.58564773]
epoch: 18 ----> MSE: [3.35230104]
epoch: 19 ----> MSE: [3.14680877]
epoch: 20 ----> MSE: [2.96112546]
epoch: 21 ----> MSE: [2.79341993]
epoch: 22 ----> MSE: [2.64098053]
epoch: 23 ----> MSE: [2.50459222]
epoch: 24 ----> MSE: [2.38150373]
epoch: 25 ----> MSE: [2.26769014]
epoch: 26 ----> MSE: [2.16732809]
epoch: 27 ----> MSE: [2.07278018]
epoch: 28 ----> MSE: [1.98778401]
epoch: 29 ----> M

In [7]:
print(m_stat,"\n")
print(m)

X_test = np.random.rand(500,5)
X_test = np.concatenate([X_test,np.ones(shape = (500,1))] , axis = 1)
y_test = X_test @ m

y_preds = X_test @ m_stat
mse = mean_squared_error(X_test, y_test, m_stat)

print("mse" , mse)

print(y_test[:5])
print(y_preds[:5])

[[13.00639727]
 [ 1.99181068]
 [ 9.98358572]
 [18.01930283]
 [ 4.00991681]
 [ 0.43980524]] 

[[13.        ]
 [ 2.        ]
 [10.        ]
 [18.        ]
 [ 4.        ]
 [ 0.43492067]]
mse [0.00016199]
[[28.71518731]
 [33.78538628]
 [16.60378544]
 [34.80625448]
 [15.34419154]]
[[28.72092683]
 [33.79828508]
 [16.60602936]
 [34.82223876]
 [15.34569742]]
