<div>
<img src="https://cdn-images-1.medium.com/max/1024/1*-QTg-_71YF0SVshMEaKZ_g.png" 
   width="400" style="margin: 50px auto; display: block; position: relative; left: -30px;" />
</div>

<!--NAVIGATION-->
<!--NAVIGATION-->
# [PyTorch Primer](1-pytorch.ipynb) | TensorFlow Primer | [PyT vs TF](3-pytorch_vs_tf.ipynb) 

## Submodule -1.4 : A Primer on PyTorch and Tensorflow

## Table of Contents

#### 1. [Installation](#Building-TensorFlow)
#### 3. [Basics of TensorFlow](##Basics-of-TensorFlow)
#### 4. [Automatic Differentiation](#Automatic-Differentiation)



# Building TensorFlow

### Install TensorFlow2

In [None]:
!pip3 install tensorflow

# Basics of TensorFlow

#### Tensors and Properties

#### Methods on Tensors

###### Sum

In [39]:
# Set seed for reproducibility
torch.manual_seed(0)


# Random Matrix of shape=(3,3)
x = torch.rand(3,2)
print(f"x: {x}")

xsum = torch.sum(x, dim=1)
print(f"xsum using mthod1: {xsum}")

x.sum(dim=1)
print(f"xsum using mthod2: {xsum}")



x: tensor([[0.4963, 0.7682],
        [0.0885, 0.1320],
        [0.3074, 0.6341]])
xsum using mthod1: tensor([1.2645, 0.2205, 0.9415])
xsum using mthod2: tensor([1.2645, 0.2205, 0.9415])


###### Reshape : `view` and  `reshape`  methods

In [62]:
#### Inplace Reshaping

# A vector of length N=10
x = torch.tensor([1,2,3,4,5,6,7,8,9,10, 11, 12])
# Reshape in amatrix of shape= (2,5)
x.view(3,4)

# Reshape with unspecified number of rows and 4 column
x.view(-1, 4)

#### Reshaping via copying

# A vector of length N=10
x = torch.tensor([1,2,3,4,5,6,7,8,9,10,11,12])

# Reshape in amatrix of shape= (2,5)
y3 = x.reshape(3,4)

# Reshape with unspecified number of rows and 4 column
y4 = x.reshape(-1,4)

###### Computing Norm of a vector using `norm` method

$$
L_p~\text{norm}:
||{\mathbf{x}}||_p = \left(\sum_i |x_i|^p\right)^{\frac{1}{p}}
$$


$$
L^1~\text{norm}:
|| {\mathbf{x}} ||_1 = |x_1| + |x_2| + \ldots + |x_n|
$$

$$
L^2~\text{norm}:
|| {\mathbf{x}} ||_2 = \sqrt{x_1^2 + x_2^2 + \ldots + x_n^2}
$$



In [90]:
torch.manual_seed(0)
x = torch.rand(3)
x.norm(p=1)
x.norm(p=2)
print(f"L1 Norm of x is:{x.norm(p=1)}")
print(f"L2 Norm of x is:{x.norm(p=2)}")

L1 Norm of x is:1.3529558181762695
L2 Norm of x is:0.9188381433486938


###### Computing Norm of a vector without using the method

In [89]:
n1 = torch.sum(torch.abs(x))
print(f"L1 norm: is: {n1}")
n2 = torch.sqrt(torch.sum(x**2))
print(f"L2 norm: is: {n2}")

## Or Calling method directly on the data structures
n1 = x.abs().sum()
print(f"L1 norm: is: {n1}")
n2 = (x**2).sum().sqrt() 
print(f"L2 norm: is: {n2}")

L1 norm: is: 1.3529558181762695
L2 norm: is: 0.9188381433486938
L1 norm: is: 1.3529558181762695
L2 norm: is: 0.9188381433486938


### Tensors on GPU

In [61]:
import torch
torch.cuda.is_available()  # Check if we can use GPUs

False

In [65]:
x = torch.Tensor([[4,5,8], [3,8,9]])

### Mapping tensors to GPU

In [66]:
dev_cpu = torch.device("cpu")
dev_gpu = torch.device("cuda:0")

# Send Tensor to GPU
x.to(dev_cpu)


tensor([[4., 5., 8.],
        [3., 8., 9.]])

In [67]:
# At the start of your code
device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")

# For later dispatch
x.to(device)

tensor([[4., 5., 8.],
        [3., 8., 9.]])

### NumPy ----> PyTorch ----> NumPy

In [94]:
import numpy as np

X = np.random.random((4,4))
#print(X)

In [93]:
# NumPy to PyTorch
Y = torch.from_numpy(X)
#print(Y)

In [70]:
# PyTorch ---> NumPy

In [92]:
X = Y.numpy()
#print(X)

### Timing GPU Operations

In [82]:
A = torch.rand(100, 400, 400)
#B = A.cuda()
A.size()

torch.Size([100, 400, 400])

In [77]:
%timeit -n 3 torch.bmm(A, A)
%timeit -n 3 torch.bmm(B, B)

torch.Size([1000, 1000])


$$
a = 8\\
b = 6\\
c = a + b\\
d = a * c\\
$$
Compute $ \frac{\partial d}{\partial a}$
High School Approach:

1. $\qquad d = a*c$

2. $\qquad  \frac{\partial d}{\partial a} = c * \frac{\partial a}{\partial a} + a * \frac{\partial c}{\partial a}$

3. $\qquad  \frac{\partial d}{\partial a} = c + a* \frac{\partial c}{\partial a}$
4. $\qquad  \frac{\partial d}{\partial a} = (a + b) + a*\frac{\partial a}{\partial a} + a * \frac{\partial b}{\partial a} $
5. $\qquad  \frac{\partial d}{\partial a} = a + b + a*(1 + 0)$
6. $\qquad  \frac{\partial d}{\partial a} = 2a + b $
7. $\qquad  \frac{\partial d}{\partial a} = 2*8+ 6 = 22 $

In case if you need to $\frac{\partial d}{\partial a}$, steps 1-7 need to carried out again.
"

# Automatic Differentiation

In [53]:
from collections import defaultdict

class Var:
    def __init__(self, val, local_grad=()):
        self.val = val
        self.local_grad = local_grad
         
    def __add__(self, other):
        y = self.val + other.val
        local_grad = ((self, 1), (other, 1))       
        return Var(y, local_grad)
    
    def __mul__(self, other):
        y = self.val*other.val
        local_grad = ((self, other.val), (other, self.val))
        return Var(y, local_grad)
    
    def __sub__(self, other):
        y = self.val - other.val
        local_grad = ((self, 1), (other, -1))       
        return Var(y, local_grad)
    

        
    
    
def get_grads(var):
    grad = defaultdict(lambda:0)
    
    def compute_grad(var, path):
        for child_var, loc_grad in var.local_grad:
            val_path_child = path * loc_grad
            grad[child_var] += val_path_child
            compute_grad(child_var,val_path_child)   
    
    compute_grad(var, path=1)
    
    return grad
    
    
    
    
   
        
        

In [57]:
a = Var(8)
b = Var(6)

## AD for Addition 

c = a + b
d = a*c

grad = get_grads(d)

print(f"AD of addition: {grad[a]}")

## AD for Subtraction 

c = a - b
d = a*c

grad = get_grads(d)

print(f"AD of subtraction: {grad[a]}")



AD of addition: 22
AD of subtraction: 10


In [3]:
import tensorflow.compat.v1 as tf
import numpy as np
tf.disable_v2_behavior()


def f_x(x, A):
    """
        Inputs:
            x (a Tensorflow tensor): the trial eigenvector (i.e. the output
                of the neural network)
            A (a 2D Numpy array): the matrix to find eigenvectors of
        Returns:
            f (a Tensorflow tensor): the result of the function f(x(t)), 
                defined in the paper referenced above.  When x(t) is 
                converged, f(x(t)) = x(t)
        Returns the value of f(x) at a given value of x.
    """
    xTxA = (tf.tensordot(tf.transpose(x), x, axes=1)*A)
    # (1- xTAx)*I
    xTAxI = (1- tf.tensordot(tf.transpose(x), tf.tensordot(A, x, axes=1), axes=1))*np.eye(matrix_size)
    # (xTx*A - (1- xTAx)*I)*x
    f = tf.tensordot((xTxA + xTAxI), x, axes=1)

    return f

##############################
# NN EIGENVALUE
##############################
def NN_Eigenvalue(matrix_size, A, max_iterations, nn_structure, eigen_guess, 
                    eigen_lr, delta_threshold, verbose):
    """
        Inputs:
            matrix_size (an int): the dimension of the matrix
            A (a 2D Numpy array): A square, symmetric matrix to find 
                an eigenvector and eigenvalue of.
            max_iterations (an int): the maximum number of training iterations 
                to be used by the neural network
            nn_structure (a list): the number of neurons in each layer of the
                neural network
            eigen_guess (an int): to find the lowest eigenvalue, a number smaller
                than the predicted eigenvalue.  To find the largest eigenvalue,
                a number larger than the predicted eigenvalue.
            eigen_lr (a float): the learning rate for the portion of the loss
                function that controls which eigenvalue is found.  Set to 0.0
                to find a random eigenvalue.
            delta_threshold (a float): the minimum value desired between two
                sequentially calculated eigenvalues
            verbose (a boolean): True case prints the state of the eigenvalue for
                every 100th training iteration.
        Returns:
            eigenvalue (a float): the predicted eigenvalue of matrix A
            eigenvector (a numpy array): the predicted eigenvector of the matrix 
                A
        Computes a prediction for an eigenvalue and an eigenvector of a given
        matrix using a neural network.  Parameters are given to control which
        eigenvalue and eigenvector are found.
    """
    # Defining the 6x6 identity matrix
    I = np.identity(matrix_size)
    
    # Create a vector of random numbers and then normalize it
    # This is the beginning trial solution eigenvector
    x0 = np.random.rand(matrix_size)
    x0 = x0/np.sqrt(np.sum(x0*x0))
    # Reshape the trial eigenvector into the format for Tensorflow
    x0 = np.reshape(x0, (1, matrix_size))

    # Convert the above matrix and vector into tensors that can be used by
    # Tensorflow
    I_tf = tf.convert_to_tensor(I)
    x0_tf = tf.convert_to_tensor(x0, dtype=tf.float64)

    # Set up the neural network with the specified architecture
    with tf.variable_scope('dnn'):
        num_hidden_layers = np.size(nn_structure)

        # x0 is the input to the neural network
        previous_layer = x0_tf
        # Hidden layers
        for l in range(num_hidden_layers):
            current_layer = tf.layers.dense(previous_layer, nn_structure[l],activation=tf.nn.relu )
            previous_layer = current_layer

        # Output layer
        dnn_output = tf.layers.dense(previous_layer, matrix_size)
      
    # Define the loss function
    with tf.name_scope('loss'):
        # trial eigenvector is the output of the neural network
        x_trial = tf.transpose(dnn_output) 
        # f(x)
        f_trial = tf.transpose(f_x(x_trial, A))
        # eigenvalue calculated using the trial eigenvector using the 
        # Rayleigh quotient formula
        eigenvalue_trial = tf.transpose(x_trial)@A@x_trial/(tf.transpose(x_trial)@x_trial)
        
        x_trial = tf.transpose(x_trial) 

        # Define the loss function
        loss = tf.losses.mean_squared_error(f_trial, x_trial) + \
                eigen_lr*tf.losses.mean_squared_error([[eigen_guess]], eigenvalue_trial)
                                                                                                        
    # Define the training algorithm and loss function
    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    ## Execute the Tensorflow session
    with tf.Session() as sess:  
        # Initialize the Tensorflow variables
        init = tf.global_variables_initializer()
        init.run()

        # Define for calculating the change between successively calculated
        # eigenvalues
        old_eig = 0

        for i in range(max_iterations):
            sess.run(training_op)
            # Compute the eigenvalue using the Rayleigh quotient
            eigenvalue = (x_trial.eval() @ (A @ x_trial.eval().T)
                        /(x_trial.eval() @ x_trial.eval().T))[0,0]
            eigenvector = x_trial.eval()

            # Calculate the change between the currently calculated eigenvalue
            # and the previous one
            delta = np.abs(eigenvalue-old_eig)
            old_eig = eigenvalue
            
            # Print useful information every 100 steps
            if verbose:
                if i % 100 == 0:
                    l = loss.eval()
                    print("Step:", i, "/",max_iterations, "loss: ", l,
                          "Eigenvalue:" , eigenvalue)
            # Kill the loop if the change in eigenvalues is less than the 
            # given threshold
            if delta < delta_threshold: 
                break

    # Return the converged eigenvalue and eigenvector
    return eigenvalue, eigenvector


def random_symmetric (matrix_size):
    """
        Inputs:
            matrix_size (an int): the size of the matrix to be constructed
        Returns:
            A (a 2D Numpy array): a symmetric matrix
        Constructs a symmetric matrix of the given size filled with random numbers.
    """

    # Create a matrix filled with random numbers
    A = np.random.rand (matrix_size, matrix_size)

    # Ensure that matrix A is symmetric
    A = (np.transpose(A) + A) / 2

    return A

Instructions for updating:
non-resource variables are not supported in the long term


In [5]:
# Defining variables
matrix_size = 6 # Size of the matrix
max_iterations = 5000 # Maximum number of iterations
nn_structure = [100,100] # Number of hidden neurons in each layer
eigen_guess =  70 # Guess for the eigenvalue (see the header of NN_Eigenvalue)
eigen_lr = 0.01 # Eigenvalue learnign rate (see the header of NN_Eigenvalue)
delta_threshold = 1e-16 # Kill condition
verbose = True # True to display state of neural network evrey 100th iteration

# Create the matrix to be used
A = random_symmetric (matrix_size)


# Find the eigenvalues and the eigenvectors using Numpy to compare to the 
numpy_eigenvalues, numpy_eigenvectors = np.linalg.eig(A)

# Reset the Tensorflow graph, causes an error if this is not here
# Since the above cells are not re-ran every time this one is, they are not 
# reset.  This line is needed to reset the Tensorflow computational graph to
# avoid variable redefinition errors. 
tf.reset_default_graph()

# Calcualte the estimate of the eigenvalue and the eigenvector
eigenvalue, eigenvector = NN_Eigenvalue(matrix_size, A, max_iterations,
                                        nn_structure, eigen_guess, eigen_lr, 
                                        delta_threshold, verbose)

## Compare with the analytical solution
print("\n Numpy Eigenvalues: \n", numpy_eigenvalues)
print("\n Final Numerical Eigenvalue \n", eigenvalue)
diff = np.min(abs(numpy_eigenvalues - eigenvalue))
print("\n")
print('Absolute difference between Numerical Eigenvalue and TensorFlow DNN = ',diff)



Step: 0 / 5000 loss:  48.65819 Eigenvalue: 0.24466103886133028
Step: 100 / 5000 loss:  44.49423 Eigenvalue: 3.2960032212392214
Step: 200 / 5000 loss:  44.494217 Eigenvalue: 3.2960097205956993
Step: 300 / 5000 loss:  44.494217 Eigenvalue: 3.2960097207113894

 Numpy Eigenvalues: 
 [ 3.29600972 -0.9247771   0.14777154 -0.43953424 -0.2140425   0.64863175]

 Final Numerical Eigenvalue 
 3.2960097207113903


Absolute difference between Numerical Eigenvalue and TensorFlow DNN =  7.549516567451064e-15
