In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np


2023-06-03 03:26:03.085389: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [100]:
class DenseLayer(tf.Module):
    def __init__(self, input_size, output_size, activation='linear'):
        super().__init__()
        self.weights = tf.Variable(tf.random.normal((input_size, output_size)) * 0.01) 
        self.biases = tf.Variable(tf.zeros(output_size))
        
        self.activation = activation
        self.activation_map = { 'sigmoid': tf.sigmoid,
                                'relu' : tf.nn.relu,
                                'linear' : tf.identity
                              }
    
                            
class DenseNetwork(tf.Module):
    def __init__(self):
        super().__init__()
        self.layers = []
        
    def add_layer(self, layer):
        self.layers.append(layer)
    
                                  
    def fit(self, inputs,output, lr,max_iter = 1000,dropout = 1):
        
        p = dropout
        b1 = 0.9
        b2 = 0.999
        eps = 1e-6
        #initing moments for Adam
        v_w = [0] * len(self.layers)
        r_w = [0] *  len(self.layers)
        v_b = [0] *  len(self.layers)
        r_b = [0] *  len(self.layers)
        
        
                                  
        for epoch in range(1,max_iter + 1):
            #performing forward prop
            with tf.GradientTape(persistent = False) as tape:
                L = inputs
                for layer in self.layers:
                    if layer != self.layers[-1]:
                        L = L @ layer.weights + layer.biases
                        #dropout
                        L = layer.activation_map[layer.activation](L)
                        L = L * tf.cast((tf.random.uniform(L.shape) < p),tf.float32)/p
                    else:
                        #skipping dropout for outputs
                        L = L @ layer.weights + layer.biases
                        L = layer.activation_map[layer.activation](L)
                mse = tf.math.reduce_mean((L - output) * (L - output))
                print(f"epoch {epoch}:{mse}")
            grad = tape.gradient(mse, self.trainable_variables)
            self.grad = grad
            #grad update
            if epoch % 200 == 0:
                lr /= 10
            for i, layer in enumerate(self.layers):
                #Adam
                v_w[i] = b1 * v_w[i] + (1-b1) * grad[2*i + 1]
                v_b[i] = b1 * v_b[i] + (1-b1) * grad[2*i]
                
                
                r_w[i] = b1 * r_w[i] + (1 - b1) * grad[2*i + 1]* grad[2*i + 1]
                r_b[i] = b1 * r_b[i] + (1 - b1) * grad[2*i] * grad[2*i]
                
                
                v_w_hat = v_w[i]/(1 - b1**epoch)
                v_b_hat = v_b[i]/(1 - b1**epoch)
                
                r_w_hat = r_w[i]/(1 - b2**epoch)
                r_b_hat = r_b[i]/(1 - b2**epoch)

                
                grad_w = lr * v_w_hat/(tf.sqrt(r_w_hat) + eps)
                grad_b = lr * v_b_hat/(tf.sqrt(r_b_hat) + eps)
                
                layer.biases.assign(layer.biases - tf.Variable(grad_b))
                layer.weights.assign(layer.weights - tf.Variable(grad_w))

        
    def predict(self,inputs):
        L = inputs
        for layer in self.layers:

            L = L @ layer.weights + layer.biases
            L = layer.activation_map[layer.activation](L)
    
        return L

In [102]:
# Generate synthetic dataset
X, y = make_regression(n_samples=100, n_features=10, noise=0.5, random_state=78)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=78
)

y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train sklearn's LinearRegression model
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Predict with sklearn's LinearRegression model
y_pred_lr = lr_model.predict(X_test_scaled)

net = DenseNetwork()
net.add_layer(DenseLayer(10,10,'relu'))
net.add_layer(DenseLayer(10,10,'relu'))
net.add_layer(DenseLayer(10,1))
net.fit(X_train_scaled,y_train,1,dropout = 1)

y_pred_dense = net.predict(X_test_scaled)

# Compare the results
print("Mean Squared Error (sklearn LinearRegression):", mean_squared_error(y_test, y_pred_lr))
print("Mean Squared Error (DenseNetwork implemented from scratch):", mean_squared_error(y_test, y_pred_dense))

epoch 1:18553.91796875
epoch 2:18555.73046875
epoch 3:18545.955078125
epoch 4:18500.76953125
epoch 5:18393.666015625
epoch 6:18169.900390625
epoch 7:17787.77734375
epoch 8:17190.251953125
epoch 9:16328.197265625
epoch 10:15169.515625
epoch 11:13725.013671875
epoch 12:12093.681640625
epoch 13:10506.9609375
epoch 14:9266.0419921875
epoch 15:8502.7763671875
epoch 16:7569.2373046875
epoch 17:6014.56494140625
epoch 18:4243.4111328125
epoch 19:2764.162353515625
epoch 20:1874.014892578125
epoch 21:1514.642333984375
epoch 22:1659.8033447265625
epoch 23:2279.35595703125
epoch 24:2856.760498046875
epoch 25:2919.78515625
epoch 26:2532.23388671875
epoch 27:1890.426025390625
epoch 28:1332.5596923828125
epoch 29:1043.4388427734375
epoch 30:911.4248046875
epoch 31:815.6580810546875
epoch 32:722.8958740234375
epoch 33:625.5436401367188
epoch 34:568.0466918945312
epoch 35:616.103271484375
epoch 36:762.9039306640625
epoch 37:836.3033447265625
epoch 38:733.5245971679688
epoch 39:573.3450317382812
epoch 4