# Introduction to Tensorflow

from the Udacity Deep Learning [Nanodegree](http://udacity.com)

In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

Coding the Linear Function in Tensorlow

In [2]:
x = tf.Variable(5)

Now we have to initialize the Variable in Tensorflow

In [3]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)

The tf.global_variables_initializer() call returns an operation that will initialize all TensorFlow variables from the graph. You call the operation using a session to initialize all the variables as shown above. Using the tf.Variable class allows us to change the weights and bias, but an initial value needs to be chosen.

Initializing the weights with random numbers from a normal distribution is good practice. Randomizing the weights helps the model from becoming stuck in the same place every time you train it. You'll learn more about this in the next lesson, when you study gradient descent.

In [4]:
n_features = 120
n_labels = 5
weights = tf.Variable(tf.truncated_normal((n_features, n_labels)))

The tf.truncated_normal() function returns a tensor with random values from a normal distribution whose magnitude is no more than 2 standard deviations from the mean.

In [5]:
# Initialize the Bias to 0
bias = tf.Variable(tf.zeros(n_labels))

### Testing the Linear Classifier on the DIGITS dataset

In [6]:
# Define the necessary class
class linear_classifier(object):
    
    def __init__(self, input):
        self.features = input[:,0]
        self.labels = input[:, 1]
        self.n_features = len(self.features[0])
        self.n_labels = len(self.labels[0])
        
        
    def get_weights(self):
        self.weights = tf.Variable(tf.truncated_normal((self.n_features, self.n_labels)))
    
    def get_biases(self):
        self.biases = tf.Variable(tf.zeros(n_labels))
    
    def calculate_linear(self):
        self.logits = tf.add(tf.matmul(self.features, self.weights), self.biases)
    
    def fit(self):
        # Set the values
        self.get_weights()
        self.get_biases()
        self.calculate_linear()
        
        # Create Train Test split
        train_features, train_labels = train_test_split(self.features, self.labels)
        
        # run the session
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            
            # Run a softmax layer
            prediction = tf.nn.softmax(logits)
            
            # Calculate the loss function (cross entropy)
            cross_entropy = -tf.reduce_sum(self.labels * tf.log(prediction), reduction_indices=1)
            
            # Calculate the loss
            loss = tf.reduce_mean(cross_entropy)
            
            learning_rate = 0.08
            
            # Run optimizer and get loss
            _, l = session.run(
                [optimizer, loss],
                feed_dict = {features: train_features, labels: train_labels}
            )
            
        return 
    
    

### Tensor Flow Softmax Implemementation

In [7]:
def run(logit_data):
    output = None
    logits = tf.placeholder(tf.float32)
    
    # Calculate the softmax of the logits
    softmax = tf.nn.softmax(logits)
    
    # Run the model
    with tf.Session() as sess:
        output = sess.run(softmax, feed_dict={logits: logit_data})
        
    return output

print(run([2.0, 1.0, 0.1]))

[ 0.65900117  0.24243298  0.09856589]


### One Hot Encoding

Using the softmax representation of propabilities for a given class in prediction, we can design our input to represent the correct answer for our training examples in the same way.

We set the correct class to have propability of 1 and all others to have propability 0.

The resulting encoding is called a **one-hot vector**

In [8]:
# Implementing one hot enconding using scikit learn
import numpy as np
from sklearn.preprocessing import LabelBinarizer

# Example labels
labels = np.array([1,5,3,2,4,3,2,1])

# Creating the encoder
lb = LabelBinarizer()

# Fit the encoder to the classes and create one-hot vecrotsx 
lb.fit(labels)

# Transform labels to one hot encoded vectors
lb.transform(labels)

array([[1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0]])

## Loss Function for multiple categorical classification

Where we previously used the sum-of-squares-error function to calculate the cost function, we now have to calculate the error for an output vector (softmax non-linearity)

We want to be the error in proportion to the distance between the one-hot-vector (label) and our softmax-vector.
This distance is calculated using cross-entropy. 

Its calculated as the **sum of the distance of the label times the natural log of the prediction propabilities**.

As all functions cancel because of the 0 in the one-hot-vector, we can reformulate this solution to the product of the natural log of the predicted propability at the correct class and 1.

### Cross Entropy in Tensorflow

In [9]:
# Core Tensorflow functions
x = tf.reduce_sum([1,2,3,4,5])
x = tf.log(100.0)

In [10]:
# Implementing the Cross Entropy
softmax_data = [0.7, 0.2, 0.1]
one_hot_data = [1.0, 0.0, 0.0]

softmax = tf.placeholder(tf.float32)
one_hot = tf.placeholder(tf.float32)

# Calculating the Cross Entropy function
cross_entropy = -tf.reduce_sum(tf.multiply(one_hot, tf.log(softmax)))

# Running it
with tf.Session() as sess:
    print(sess.run(cross_entropy, feed_dict={
        softmax: softmax_data,
        one_hot: one_hot_data
    }))

0.356675


## "Mini-Batching" to train on subsets of data in large Models

In [9]:
def batches(batch_size, features, labels):
    # Check for same length on input variables
    assert len(features) == len(labels)
    
    output_batches = []
    sample_size = len(features)
    for start_i in range(0, sample_size, batch_size):
        end_i = start_i + batch_size
        batch = [features[start_i:end_i], labels[start_i:end_i]]
        output_batches.append(batch)
    
    return output_batches
    

In [12]:
# 4 Samples of features
example_features = [
    ['F11','F12','F13','F14'],
    ['F21','F22','F23','F24'],
    ['F31','F32','F33','F34'],
    ['F41','F42','F43','F44']]
# 4 Samples of labels
example_labels = [
    ['L11','L12'],
    ['L21','L22'],
    ['L31','L32'],
    ['L41','L42']]



example_batches = batches(3, example_features, example_labels)

In [13]:
example_batches

[[[['F11', 'F12', 'F13', 'F14'],
   ['F21', 'F22', 'F23', 'F24'],
   ['F31', 'F32', 'F33', 'F34']],
  [['L11', 'L12'], ['L21', 'L22'], ['L31', 'L32']]],
 [[['F41', 'F42', 'F43', 'F44']], [['L41', 'L42']]]]

## Stochastic Gradient Descent

Implementin g a scalable and performant solution to apply gradient descent optimization to large Models with large Amounts of Data

In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np

# Loading the Data into the Model
mnist = input_data.read_data_sets('/datasets/ud730/mnist', one_hot=True)

Extracting /datasets/ud730/mnist\train-images-idx3-ubyte.gz
Extracting /datasets/ud730/mnist\train-labels-idx1-ubyte.gz
Extracting /datasets/ud730/mnist\t10k-images-idx3-ubyte.gz
Extracting /datasets/ud730/mnist\t10k-labels-idx1-ubyte.gz


In [2]:
# Splitting the Data
train_features = mnist.train.images
test_features = mnist.test.images

train_labels = mnist.train.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)

# Setting Hyperparameter for batch training on s.g.d
learn_rate = 0.001
batch_size = 128

# Setting weights and bias
n_input = train_features.shape[1]
n_classes = train_labels.shape[1]


In [3]:
# Features and Labeles
features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

# Weights and biases
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

In [4]:
# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
learning_rate = tf.placeholder(tf.float32)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

In [5]:
# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Runing the stochastic Gradient on batch

### Training the Model on Epochs

An Epoch is a single forward and backward propagation trough the whole training data

In [6]:
def print_epoch_stats(epch_i, sess, last_features, last_labels):
    current_cost = sess.run(
        cost, 
        feed_dict={features: last_features, labels: last_labels})
    valid_accuracy = sess.run(
        accuracy, 
        feed_dict={features: valid_features, labels: valid_labels})
    print('Epoch: {:<4} - Cost: {:<8.3} Valid Accuracy'.format(
        epoch_i,
        current_cost,
        valid_accuracy))

In [7]:
# set the number of epochs
epochs = 10

In [10]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    # Training Cycle trough the epochs
    for epoch_i in range(epochs):
        
        # Train optimizer on all batches
        for batch_features, batch_labels in batches(batch_size, train_features, train_labels):
            sess.run(optimizer, feed_dict={features: batch_features, labels: batch_labels, learning_rate: learn_rate})
            
    print_epoch_stats(epoch_i, sess, batch_features, batch_labels)
    
    # Calculate accuracy for test dataset
    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: test_features, labels: test_labels})
    
print('Test Accuracy: {}'.format(test_accuracy))

InternalError: Blas GEMM launch failed : a.shape=(128, 784), b.shape=(784, 10), m=128, n=10, k=784
	 [[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_recv_Placeholder_0/_7, Variable/read)]]

Caused by op 'MatMul', defined at:
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2683, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2787, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-1a3f7655897e>", line 2, in <module>
    logits = tf.add(tf.matmul(features, weights), bias)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1801, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1263, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\frank\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(128, 784), b.shape=(784, 10), m=128, n=10, k=784
	 [[Node: MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_recv_Placeholder_0/_7, Variable/read)]]


#### Implement "S.G.D"

#### Implementing "Learning Rate Decay"

#### Implementing "Momentum"