[View in Colaboratory](https://colab.research.google.com/github/mahmoudSalim/stanford-tensorflow-tutorials/blob/master/Stanford_TF_Course_Assi_1_LogReg_MNIST.ipynb)

In [0]:
import os
import gzip
import shutil
import struct
import urllib

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf

def huber_loss(labels, predictions, delta=14.0):
    residual = tf.abs(labels - predictions)
    def f1(): return 0.5 * tf.square(residual)
    def f2(): return delta * residual - 0.5 * tf.square(delta)
    return tf.cond(residual < delta, f1, f2)

def safe_mkdir(path):
    """ Create a directory if there isn't one already. """
    try:
        os.mkdir(path)
    except OSError:
        pass

def read_birth_life_data(filename):
    """
    Read in birth_life_2010.txt and return:
    data in the form of NumPy array
    n_samples: number of samples
    """
    text = open(filename, 'r').readlines()[1:]
    data = [line[:-1].split('\t') for line in text]
    births = [float(line[1]) for line in data]
    lifes = [float(line[2]) for line in data]
    data = list(zip(births, lifes))
    n_samples = len(data)
    data = np.asarray(data, dtype=np.float32)
    return data, n_samples

def download_one_file(download_url, 
                    local_dest, 
                    expected_byte=None, 
                    unzip_and_remove=False):
    """ 
    Download the file from download_url into local_dest
    if the file doesn't already exists.
    If expected_byte is provided, check if 
    the downloaded file has the same number of bytes.
    If unzip_and_remove is True, unzip the file and remove the zip file
    """
    if os.path.exists(local_dest) or os.path.exists(local_dest[:-3]):
        print('%s already exists' %local_dest)
    else:
        print('Downloading %s' %download_url)
        local_file, _ = urllib.request.urlretrieve(download_url, local_dest)
        file_stat = os.stat(local_dest)
        if expected_byte:
            if file_stat.st_size == expected_byte:
                print('Successfully downloaded %s' %local_dest)
                if unzip_and_remove:
                    with gzip.open(local_dest, 'rb') as f_in, open(local_dest[:-3],'wb') as f_out:
                        shutil.copyfileobj(f_in, f_out)
                    os.remove(local_dest)
            else:
                print('The downloaded file has unexpected number of bytes')

def download_mnist(path):
    """ 
    Download and unzip the dataset mnist if it's not already downloaded 
    Download from http://yann.lecun.com/exdb/mnist
    """
    safe_mkdir(path)
    url = 'http://yann.lecun.com/exdb/mnist'
    filenames = ['train-images-idx3-ubyte.gz',
                'train-labels-idx1-ubyte.gz',
                't10k-images-idx3-ubyte.gz',
                't10k-labels-idx1-ubyte.gz']
    expected_bytes = [9912422, 28881, 1648877, 4542]

    for filename, byte in zip(filenames, expected_bytes):
        download_url = os.path.join(url, filename)
        local_dest = os.path.join(path, filename)
        download_one_file(download_url, local_dest, byte, True)

def parse_data(path, dataset, flatten):
    if dataset != 'train' and dataset != 't10k':
        raise NameError('dataset must be train or t10k')

    label_file = os.path.join(path, dataset + '-labels-idx1-ubyte')
    with open(label_file, 'rb') as file:
        _, num = struct.unpack(">II", file.read(8))
        labels = np.fromfile(file, dtype=np.int8) #int8
        new_labels = np.zeros((num, 10))
        new_labels[np.arange(num), labels] = 1
    
    img_file = os.path.join(path, dataset + '-images-idx3-ubyte')
    with open(img_file, 'rb') as file:
        _, num, rows, cols = struct.unpack(">IIII", file.read(16))
        imgs = np.fromfile(file, dtype=np.uint8).reshape(num, rows, cols) #uint8
        imgs = imgs.astype(np.float32) / 255.0
        if flatten:
            imgs = imgs.reshape([num, -1])

    return imgs, new_labels

def read_mnist(path, flatten=True, num_train=55000):
    """
    Read in the mnist dataset, given that the data is stored in path
    Return two tuples of numpy arrays
    ((train_imgs, train_labels), (test_imgs, test_labels))
    """
    imgs, labels = parse_data(path, 'train', flatten)
    indices = np.random.permutation(labels.shape[0])
    train_idx, val_idx = indices[:num_train], indices[num_train:]
    train_img, train_labels = imgs[train_idx, :], labels[train_idx, :]
    val_img, val_labels = imgs[val_idx, :], labels[val_idx, :]
    test = parse_data(path, 't10k', flatten)
    return (train_img, train_labels), (val_img, val_labels), test

def get_mnist_dataset(batch_size):
    # Step 1: Read in data
    mnist_folder = 'data/mnist'
    download_mnist(mnist_folder)
    train, val, test = read_mnist(mnist_folder, flatten=False)

    # Step 2: Create datasets and iterator
    train_data = tf.data.Dataset.from_tensor_slices(train)
    train_data = train_data.shuffle(10000) # if you want to shuffle your data
    train_data = train_data.batch(batch_size)

    test_data = tf.data.Dataset.from_tensor_slices(test)
    test_data = test_data.batch(batch_size)

    return train_data, test_data
    
def show(image):
    """
    Render a given numpy.uint8 2D array of pixel data.
    """
    plt.imshow(image, cmap='gray')
    plt.show()

In [0]:
!mkdir -p /home/data/mnist/

In [18]:
#@title Default title text
""" Starter code for simple logistic regression model for MNIST
with tf.data module
MNIST dataset: yann.lecun.com/exdb/mnist/
Created by Chip Huyen (chiphuyen@cs.stanford.edu)
CS20: "TensorFlow for Deep Learning Research"
cs20.stanford.edu
Lecture 03
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import numpy as np
import tensorflow as tf
import time

#import utils

# Define paramaters for the model
learning_rate = 0.01
batch_size = 128
n_epochs = 100
n_train = 60000
n_test = 10000

# Step 1: Read in data


mnist_folder = '/home/data/mnist'
download_mnist(mnist_folder)
train, val, test = read_mnist(mnist_folder, flatten=True)

# Step 2: Create datasets and iterator
# create training Dataset and batch it
train_data = tf.data.Dataset.from_tensor_slices(train)
train_data = train_data.shuffle(10000) # if you want to shuffle your data
train_data = train_data.batch(batch_size)



Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Successfully downloaded /home/data/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Successfully downloaded /home/data/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Successfully downloaded /home/data/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Successfully downloaded /home/data/mnist/t10k-labels-idx1-ubyte.gz


In [0]:
# create testing Dataset and batch it
test_data = tf.data.Dataset.from_tensor_slices(test)
test_data = test_data.shuffle(1000)
test_data = test_data.batch(batch_size)

#############################
########## TO DO ############
#############################




In [0]:
# create one iterator and initialize it with different datasets
iterator = tf.data.Iterator.from_structure(train_data.output_types, 
                                           train_data.output_shapes)
img, label = iterator.get_next()

train_init = iterator.make_initializer(train_data)	# initializer for train_data
test_init = iterator.make_initializer(test_data)	# initializer for train_data

# Step 3: create weights and bias
# w is initialized to random variables with mean of 0, stddev of 0.01
# b is initialized to 0
# shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
# shape of b depends on Y

#X = tf.placeholder(dtype=tf.float32, shape=[batch_size, 784])
#Y = tf.placeholder(dtype=tf.float32, shape=[batch_size, 10])

w = tf.Variable(tf.random_normal(mean=0, stddev=0.01, shape=[784, 10]))
b = tf.Variable(tf.zeros(shape=[1, 10]))

#w, b = tf.Variable(tf.random_normal(mean=0, stddev=0.01, shape=[img.shape[1], label.shape[1]])), tf.Variable(tf.zeros(shape=label.shape))
#############################
########## TO DO ############
#############################



In [0]:
# Step 4: build model
# the model that returns the logits.
# this logits will be later passed through softmax layer
logits = tf.matmul(img, w) + b
#############################
########## TO DO ############
#############################




In [0]:
# Step 5: define loss function
# use cross entropy of softmax of logits as the loss function
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label)
loss = tf.reduce_mean(entropy)
#############################
########## TO DO ############
#############################




In [0]:
# Step 6: define optimizer
# using Adamn Optimizer with pre-defined learning rate to minimize loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
#############################
########## TO DO ############
#############################




In [40]:
# Step 7: calculate accuracy with test set
preds = tf.nn.softmax(logits)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())
with tf.Session() as sess:
   
    start_time = time.time()
    sess.run(tf.global_variables_initializer())

    # train the model n_epochs times
    for i in range(n_epochs): 	
        sess.run(train_init)	# drawing samples from train_data
        total_loss = 0
        n_batches = 0
        try:
            while True:
                _, l = sess.run([optimizer, loss])
                total_loss += l
                n_batches += 1
        except tf.errors.OutOfRangeError:
            pass
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))
    print('Total time: {0} seconds'.format(time.time() - start_time))

    # test the model
    sess.run(test_init)			# drawing samples from test_data
    total_correct_preds = 0
    try:
        while True:
            accuracy_batch = sess.run(accuracy)
            total_correct_preds += accuracy_batch
    except tf.errors.OutOfRangeError:
        pass

    print('Accuracy {0}'.format(total_correct_preds/n_test))
writer.close()

Average loss epoch 0: 0.36414208065631776
Average loss epoch 1: 0.2940820986794871
Average loss epoch 2: 0.2816020430867062
Average loss epoch 3: 0.2768041499305603
Average loss epoch 4: 0.2728691224268703
Average loss epoch 5: 0.2688182164070218
Average loss epoch 6: 0.26921509020896844
Average loss epoch 7: 0.2662141303510167
Average loss epoch 8: 0.2641593280573224
Average loss epoch 9: 0.26568590788993723
Average loss epoch 10: 0.2647259741848291
Average loss epoch 11: 0.26152696957768395
Average loss epoch 12: 0.258115794319053
Average loss epoch 13: 0.25720747068177824
Average loss epoch 14: 0.2598081780900789
Average loss epoch 15: 0.2561981923011846
Average loss epoch 16: 0.2573577952246333
Average loss epoch 17: 0.2539565304164277
Average loss epoch 18: 0.25674585048542464
Average loss epoch 19: 0.2519130958720695
Average loss epoch 20: 0.2538898273782675
Average loss epoch 21: 0.2541149542601996
Average loss epoch 22: 0.25211830914540345
Average loss epoch 23: 0.2521684065461

Average loss epoch 68: 0.2453836320704499
Average loss epoch 69: 0.24318492315536322
Average loss epoch 70: 0.24137048636411512
Average loss epoch 71: 0.24390938876326693
Average loss epoch 72: 0.24379069870987605
Average loss epoch 73: 0.24334337666284206
Average loss epoch 74: 0.24501731593248455
Average loss epoch 75: 0.24432663149958433
Average loss epoch 76: 0.2448380328541578
Average loss epoch 77: 0.2457387316538844
Average loss epoch 78: 0.2456878859289857
Average loss epoch 79: 0.2428288457005523
Average loss epoch 80: 0.2448053181517956
Average loss epoch 81: 0.243366892202649
Average loss epoch 82: 0.2417978424319001
Average loss epoch 83: 0.24268014074064964
Average loss epoch 84: 0.24279898483046267
Average loss epoch 85: 0.24648421273328538
Average loss epoch 86: 0.24227125681070394
Average loss epoch 87: 0.2446879244821016
Average loss epoch 88: 0.24434675274546755
Average loss epoch 89: 0.24185810456442278
Average loss epoch 90: 0.2429957128541414
Average loss epoch 91:

In [56]:
!ls

logreg


In [0]:
LOG_DIR = '/content/datalab/graphs/logreg'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [58]:
! wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip ngrok-stable-linux-amd64.zip

--2018-04-15 16:59:39--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 23.23.215.144, 23.21.132.31, 23.21.140.88, ...
Connecting to bin.equinox.io (bin.equinox.io)|23.23.215.144|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5363700 (5.1M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2018-04-15 16:59:40 (40.9 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [5363700/5363700]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   


In [0]:
get_ipython().system_raw('./ngrok http 6006 &')


In [60]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://4844db15.ngrok.io
