In [15]:
import numpy as np
np.random.seed(11)
class DenseLayer:
    def __init__(self, input_size, output_size, activation='sigmoid'):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros(output_size)
        self.activation = activation
        
        
        
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases
        
        if self.weights.shape[1] != 1:
            if self.activation == 'sigmoid':
                self.output = DenseLayer.sigmoid(self.output)

            elif self.activation == 'relu':
                self.output = DenseLayer.relu(self.output)
            
        return self.output
        
    
    def backward(self, grad_output, learning_rate):
        if self.weights.shape[1] == 1:
            
            grad_weights = np.dot(self.inputs.T, grad_output)
            grad_biases = np.sum(grad_output, axis=0)

            grad_input = np.dot(grad_output, self.weights.T)
            
        else:
            if self.activation == 'sigmoid':
                sigma = DenseLayer.sigmoid(self.inputs @ self.weights + self.biases)
                d_activation = sigma * (1 - sigma)
                
            elif self.activation == 'relu':
                d_activation = (self.inputs @ self.weights + self.biases)> 0
                d_activation = d_activation *  1
            elif self.activation == 'linear':
                d_activation = np.ones(grad_output.shape)
                
            
            grad_weights = self.inputs.T @ ( d_activation * grad_output)
            grad_biases = np.sum(grad_output * d_activation, axis=0)
            grad_input = (grad_output * d_activation) @ self.weights.T
        
            
        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases
        
        return grad_input
    @staticmethod
    def sigmoid(x):
        return (1 + np.exp(-x))**(-1)
    @staticmethod
    def relu(x):
        return x * (x > 0)
    
class DenseNetwork:
    def __init__(self):
        self.layers = []
    
    def add_layer(self, layer):
        self.layers.append(layer)
    
    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs
    
    def backward(self, grad_output, learning_rate):
        for layer in reversed(self.layers):
            grad_output = layer.backward(grad_output, learning_rate)

In [19]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Generate synthetic dataset
X, y = make_regression(n_samples=100, n_features=10, noise=0.5, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train scikit-learn's LinearRegression model
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Predict with scikit-learn's LinearRegression model
y_pred_lr = lr_model.predict(X_test_scaled)

# Train the DenseNetwork implemented from scratch
dense_net = DenseNetwork()
dense_net.add_layer(DenseLayer(10, 10,'relu'))

dense_net.add_layer(DenseLayer(10, 1,'linear'))



# Train the DenseNetwork using gradient descent
learning_rate = 0.001
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    y_pred = dense_net.forward(X_train_scaled)
#     print(y_pred)
    # Compute loss (mean squared error)
    loss = np.mean((y_pred - y_train) ** 2)
    print(f'epoch {epoch}:{loss}')
    # Backward pass
    grad_output = 2 * (y_pred - y_train) / len(X_train_scaled)
    dense_net.backward(grad_output, learning_rate)

# Predict with the DenseNetwork
y_pred_dense = dense_net.forward(X_test_scaled)

# Compare the results
print("Mean Squared Error (sklearn LinearRegression):", mean_squared_error(y_test, y_pred_lr))
print("Mean Squared Error (DenseNetwork implemented from scratch):", mean_squared_error(y_test, y_pred_dense))

epoch 0:30787.15566275043
epoch 1:30786.588231319107
epoch 2:30786.027948961797
epoch 3:30785.444733019824
epoch 4:30784.814038936405
epoch 5:30784.09811301881
epoch 6:30783.231248070428
epoch 7:30782.128599256208
epoch 8:30780.658575285674
epoch 9:30778.620772753056
epoch 10:30775.650964860593
epoch 11:30771.26624951918
epoch 12:30764.70416756614
epoch 13:30754.781965035712
epoch 14:30739.63300614045
epoch 15:30716.41361639782
epoch 16:30680.728472326045
epoch 17:30625.828104594053
epoch 18:30541.357537768083
epoch 19:30411.713440929663
epoch 20:30213.45289474911
epoch 21:29912.165383049054
epoch 22:29458.916311816873
epoch 23:28788.136771595844
epoch 24:27818.74418376004
epoch 25:26468.93304322972
epoch 26:24688.443013048785
epoch 27:22514.186070671672
epoch 28:20122.95208830665
epoch 29:17811.49820097028
epoch 30:15851.238575266256
epoch 31:14310.204829563523
epoch 32:13033.535112496873
epoch 33:11793.19782423758
epoch 34:10434.207687919412
epoch 35:8890.960040253884
epoch 36:7179.5