In [1]:
import numpy as np

class DenseLayer:
    def __init__(self, input_size, output_size, activation = None):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros(output_size)
        self.activation_ = activation
    
    def activation(self, x, derivative = False):
        if self.activation_ == 'relu':
            if derivative:
                return 1 * (x > 0)
            return np.maximum(0, x)
        
        elif self.activation_ =='sigmoid':
            if derivative:
                return self.activation(x) * (1 - self.activation(x))
            return 1 / (1 + np.exp(- x))
        return x
        
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases
        
        return self.activation(self.output)
    
    def backward(self, grad_output, learning_rate):
        if self.activation_:
            grad_output = self.activation(self.output, derivative = True) * grad_output
        grad_weights = np.dot(self.inputs.T, grad_output)
        grad_biases = np.sum(grad_output, axis=0)
        
        grad_input = np.dot(grad_output, self.weights.T)
        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases
        
        return grad_input
    
class DenseNetwork:
    def __init__(self):
        self.layers = []
    
    def add_layer(self, layer):
        self.layers.append(layer)
    
    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs
    
    def backward(self, grad_output, learning_rate):
        for layer in reversed(self.layers):
            grad_output = layer.backward(grad_output, learning_rate)
            
    def fit(self, x_train, y_train, epochs, learning_rate):
        for epoch in range(epochs):
            y_pred = self.forward(x_train)
#             loss = np.mean((y_pred - y_train) ** 2)
#             print(f'epoch {epoch}:{loss}')
            grad_output = 2 * (y_pred - y_train) / len(x_train)
            self.backward(grad_output, learning_rate)
            
    def predict(self, x):
        return self.forward(x)

In [2]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np

X, y = make_regression(n_samples=100, n_features=10, noise=0.5)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)

dense_net = DenseNetwork()
dense_net.add_layer(DenseLayer(10, 10, activation = 'relu'))
dense_net.add_layer(DenseLayer(10, 1))

dense_net.fit(X_train_scaled, y_train, 1000, 0.001)
y_pred_dense = dense_net.predict(X_test_scaled)

print("Mean Squared Error (sklearn LinearRegression):", mean_squared_error(y_test, y_pred_lr))
print("Mean Squared Error (DenseNetwork implemented from scratch):", mean_squared_error(y_test, y_pred_dense))

Mean Squared Error (sklearn LinearRegression): 0.3139436176757503
Mean Squared Error (DenseNetwork implemented from scratch): 178.57665027993505
