<a href="https://colab.research.google.com/github/szn5400/CO2_all/blob/main/simple_network_to_get_square_grad.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
X = [[2,2],[3,3],[4,4],[5,5],[6,6],[7,7]]
#Y = [[4,4],[6,6],[8,8],[10,10],[12,12],[14,14]]
Y = [[4,4],[9,9],[16,16],[25,25],[36,36],[49,49]]

In [2]:
size_input = 2
size_hidden = 2
size_output = 2
number_of_train_examples = 6
number_of_test_examples = 0

In [3]:
import os
import numpy as np
import time
import tensorflow as tf

In [4]:
np.random.seed(5)
tf.random.set_seed(5)

In [8]:
class MLP(object):
  def __init__(self, size_input, size_hidden, size_output, device=None):
    """
    size_input: int, size of input layer
    size_hidden1: int, size of hidden layer
    size_output: int, size of output layer
    device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
    """
    self.size_input, self.size_hidden, self.size_output, self.device =\
    size_input, size_hidden, size_output, device
    
    # Initialize weights between input layer and hidden layer
    self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden]))
    # Initialize biases for hidden layer
    self.b1 = tf.Variable(tf.random.normal([1, self.size_hidden]))
     # Initialize weights between hidden layer and output layer
    self.W2 = tf.Variable(tf.random.normal([self.size_hidden, self.size_output]))
    # Initialize biases for output layer
    self.b2 = tf.Variable(tf.random.normal([1, self.size_output]))
    
    # Define variables to be updated during backpropagation
    self.variables = [self.W1, self.W2, self.b1, self.b2]
    
  def forward(self, X):
    """
    forward pass
    X: Tensor, inputs
    """
    if self.device is not None:
      with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
        self.y = self.compute_output(X)
    else:
      self.y = self.compute_output(X)
      
    return self.y
  
  def loss(self, y_pred, y_true):
    '''
    y_pred - Tensor of shape (batch_size, size_output)
    y_true - Tensor of shape (batch_size, size_output)
    '''
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(tf.reshape(y_pred, (-1, self.size_output)), dtype=tf.float32)
    loss1 =  tf.losses.mean_squared_error(y_true_tf, y_pred_tf)
    #print("for loss", "y_true", y_true_tf, "y_pred", y_pred_tf)

    var_X = tf.Variable(self.X)
    var_X = tf.cast(var_X, tf.float32)
    with tf.GradientTape(persistent=True) as tape:
      #print('\n\n we are here')
      tape.watch(var_X)
      with tf.GradientTape(persistent=True) as tape2:
        tape2.watch(var_X)
        predicted_out = tf.cast(self.forward(var_X), tf.float32)
        print('predicted output', predicted_out)
        var_any = predicted_out*var_X
      grad_gas_with_time = tape2.gradient(var_any,tf.cast(var_X, tf.float32))
    grd_grad_gas_with_time = tape.gradient(grad_gas_with_time,tf.cast(var_X, tf.float32))
    print('grad values',grad_gas_with_time)
    print('grad_grad values',grd_grad_gas_with_time)

    return loss1

  def accuracy(self, y_pred, y_true):
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
    y_pred_tf = tf.cast(tf.reshape(y_pred, (-1, self.size_output)), dtype=tf.float32)
    correct_pred = tf.reduce_sum(tf.abs(y_true_tf-y_pred_tf))  
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
    return accuracy
  
  def backward(self, X_train, y_train):
    """
    backward pass
    """
    #optimizer = tf.keras.optimizers.SGD(learning_rate=1e-6)
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-1)
    with tf.GradientTape() as tape:
      predicted = self.forward(X_train)
      current_loss = self.loss(predicted, y_train)
    grads = tape.gradient(current_loss, self.variables)
    #print("weights", self.variables)
    optimizer.apply_gradients(zip(grads, self.variables))
        
        
  def compute_output(self, X):
    """
    Custom method to obtain output tensor during forward pass
    """
    # Cast X to float32
    X_tf = tf.cast(X, dtype=tf.float32)
    self.X = tf.Variable(X_tf)
    # Compute values in hidden layer1
    what1 = tf.matmul(X_tf, self.W1) + self.b1
    #what1 = tf.matmul(self.X, self.W1) + self.b1
    hhat1 = tf.nn.relu(what1)

    # Compute output
    output = tf.matmul(hhat1, self.W2) + self.b2
    output = tf.nn.softmax(output)
    #print("output predicted", output)
    return output

  def print_val(self):
    print("weights", self.variables)

In [9]:
# Set number of epochs
NUM_EPOCHS = 5

In [10]:
# Initialize model using CPU
mlp_on_cpu = MLP(size_input, size_hidden, size_output, device='gpu')

# Array to store accuracy and loss
loss_with_epoch = []
acc_with_epoch = []

time_start = time.time()
for epoch in range(NUM_EPOCHS):
  ac = 0
  count = 0
  loss_total = tf.zeros([1,1], dtype=tf.float32)
  lt = 0
  train_ds = tf.data.Dataset.from_tensor_slices((X, Y)).shuffle(25, seed=epoch*(5)).batch(6)
  #print("\nepoch",epoch)
  count = 0
  for inputs, outputs in train_ds:
    #inputs = tf.Variable(inputs)
    #print('input output', inputs, outputs)
    preds = mlp_on_cpu.forward(inputs)
    #print('preds',preds)
    loss_total = loss_total + mlp_on_cpu.loss(preds, outputs)
    lt = lt + mlp_on_cpu.loss(preds, outputs)
    mlp_on_cpu.backward(inputs, outputs)
    ac = ac+mlp_on_cpu.accuracy(preds, outputs)
    #ac = mlp_on_cpu.accuracy(preds, outputs)
    count += 1
    if(epoch%1000 == 0):
      mlp_on_cpu.print_val()
      print("output", outputs, "preds", preds)
  #print('Number of Epoch = {} - Average celoss:= {}- Acc:= {} '.format(epoch + 1, np.sum(loss_total) / len(X), ac/count))
  loss_with_epoch.append(np.sum(loss_total) / len(X))
  acc_with_epoch.append(ac/count)
time_taken = time.time() - time_start

print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

predicted output tf.Tensor(
[[9.1865581e-01 8.1344254e-02]
 [9.9999464e-01 5.3592635e-06]
 [1.0000000e+00 8.2615923e-09]
 [9.9986351e-01 1.3648056e-04]
 [9.9653542e-01 3.4645328e-03]
 [9.9999976e-01 2.1041905e-07]], shape=(6, 2), dtype=float32)
grad values tf.Tensor(
[[9.1865617e-01 8.1343569e-02]
 [9.9999464e-01 5.3592635e-06]
 [1.0000000e+00 8.2615923e-09]
 [9.9986351e-01 1.3648056e-04]
 [9.9653542e-01 3.4645328e-03]
 [9.9999976e-01 2.1041905e-07]], shape=(6, 2), dtype=float32)
grad_grad values tf.Tensor(
[[ 2.4192858e-01 -2.4192877e-01]
 [ 1.7404556e-05 -1.7350445e-05]
 [ 0.0000000e+00 -2.6746786e-08]
 [ 4.4178963e-04 -4.4179356e-04]
 [ 1.1177890e-02 -1.1178254e-02]
 [ 7.1525574e-07 -6.8122847e-07]], shape=(6, 2), dtype=float32)
predicted output tf.Tensor(
[[9.1865581e-01 8.1344254e-02]
 [9.9999464e-01 5.3592635e-06]
 [1.0000000e+00 8.2615923e-09]
 [9.9986351e-01 1.3648056e-04]
 [9.9653542e-01 3.4645328e-03]
 [9.9999976e-01 2.1041905e-07]], shape=(6, 2), dtype=float32)
grad values t