# NNets with NumPy and Tensorflow (cont'd)
# Hardcore Pandas

In [75]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics


import pandas as pd
import numpy as np
import os, os.path, gzip, tempfile, urllib.request

def load_mnist(kind='train', dataset='zalando'): # 'train' or 't10k'
    """based on https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py"""
    
    if dataset=='zalando':
        url_base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/'
    else:
        url_base = 'http://yann.lecun.com/exdb/mnist/'
        
    url_labels = url_base+'%s-labels-idx1-ubyte.gz'%kind
    url_images = url_base+'%s-images-idx3-ubyte.gz'%kind

    file_labels = os.path.join(tempfile.gettempdir(), '%s-labels-idx1-ubyte.gz'%kind)
    file_images = os.path.join(tempfile.gettempdir(), '%s-images-idx3-ubyte.gz'%kind)
    
    if not os.path.exists(file_labels):
        urllib.request.urlretrieve(url_labels, file_labels)
        
    if not os.path.exists(file_images):
        urllib.request.urlretrieve(url_images, file_images)
    
    with gzip.open(file_labels, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(file_images, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    assert len(images.shape)==2
    assert len(labels.shape)==1
    assert images.shape[0] == labels.shape[0]
    assert images.shape[1] == 28*28
    return images, labels

X_train, Y_train = load_mnist('train')
X_test,  Y_test  = load_mnist('t10k')

X_train = X_train/255
X_test = X_test/255

def one_hot_encode(Y):
    k = np.max(Y)+1
    return np.eye(k)[Y,:]

Y_train2 = one_hot_encode(Y_train)
Y_test2  = one_hot_encode(Y_test)

def one_hot_decode(Y2):
    return np.argmax(Y2, axis=1)

def mode(Y):
    vals, cnts = np.unique(Y, return_counts=True)
    return np.random.choice(vals[cnts==cnts.max()], 1)

In [76]:
X_train2 = np.insert(X_train, 0, 1, axis=1)
X_test2  = np.insert(X_test , 0, 1, axis=1)

In [77]:
def softmax(r):  # softmax for r -- a vector
    r2 = np.exp(r)
    return r2/np.sum(r2,axis=1).reshape(-1,1)

# rewrite the above so that you can compute softmax for each row in matrix r

In [78]:
np.random.seed(123)
C = np.random.randn(785, 10)
Y_pred = softmax(X_train2 @ C)

In [79]:
np.round(Y_pred[:5,:],2)

array([[0.  , 0.03, 0.89, 0.  , 0.  , 0.08, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.07, 0.  , 0.  , 0.  , 0.78, 0.  , 0.  , 0.15, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.85, 0.14, 0.  , 0.01, 0.  ],
       [0.  , 0.05, 0.  , 0.  , 0.  , 0.95, 0.  , 0.  , 0.  , 0.  ]])

In [80]:
# accuracy
def accuracy(Y_pred, Y_train):
    return np.mean(one_hot_decode(Y_pred) == Y_train)

accuracy(Y_pred, Y_train)

0.04795

In [81]:
np.unique(Y_train, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8),
 array([6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000]))

In [82]:
def cross_entropy(C, X_train2, Y_train2):
    # Y_train2 - one-hot-encoded
    Y_pred =  softmax(X_train2 @ C)
    return -np.sum(Y_train2*np.log(Y_pred))/X_train2.shape[0]
    

In [83]:
best_C = None
best_error = np.inf
for i in range(1000):
    C = np.random.randn(785, 10)
    err = cross_entropy(C, X_train2, Y_train2)
    if err < best_error:
        best_error = err
        best_C = C
        print(best_error, accuracy(softmax(X_train2@C),Y_train))
        

14.716710024471196 0.11415
14.623067445282864 0.16771666666666665
10.716627642887518 0.07833333333333334
10.489890116429194 0.1546
9.702626155724984 0.12291666666666666


In [84]:
def grad_cross_entropy(C, X_train2, Y_train2):
    Y_pred = softmax(X_train2 @ C)
    return -X_train2.T @ (Y_train2 - Y_pred) / X_train2.shape[0]

In [85]:
# Gradient Descent
np.random.seed(123)
C = np.random.randn(785, 10)
eta = 0.1 # "learning rate"
for i in range(100):
    C = C-eta*grad_cross_entropy(C, X_train2, Y_train2)
    
    if i %10 == 9:
        print("%4d: cross_entropy=%10.7f, acc_train=%.3f, acc_test=%.3f"%(i, 
            cross_entropy(C, X_train2, Y_train2),
            accuracy(softmax(X_train2 @ C), Y_train),
            accuracy(softmax(X_test2 @ C), Y_test),
            ))

   9: cross_entropy= 8.0049560, acc_train=0.106, acc_test=0.109
  19: cross_entropy= 5.9949248, acc_train=0.199, acc_test=0.206
  29: cross_entropy= 4.9223162, acc_train=0.280, acc_test=0.285
  39: cross_entropy= 4.2912178, acc_train=0.337, acc_test=0.343
  49: cross_entropy= 3.8827793, acc_train=0.380, acc_test=0.384
  59: cross_entropy= 3.5953002, acc_train=0.413, acc_test=0.415
  69: cross_entropy= 3.3792933, acc_train=0.438, acc_test=0.441
  79: cross_entropy= 3.2084810, acc_train=0.456, acc_test=0.458
  89: cross_entropy= 3.0680396, acc_train=0.474, acc_test=0.475
  99: cross_entropy= 2.9491259, acc_train=0.488, acc_test=0.487


In [49]:
# Mini-batch/stochastic Gradient Descent
np.random.seed(123)
C = np.random.randn(785, 10)
eta = 0.1 # "learning rate"
batch_size = 600
for i in range(100):
    # subset = np.random.randint(0, X_train2.shape[0], batch_size)
    for j in range(X_train2.shape[0]//batch_size):
        subset = np.random.choice(np.arange(X_train2.shape[0]), batch_size, replace=False)
        C = C-eta*grad_cross_entropy(C, X_train2[subset,:], Y_train2[subset,:])
    
    if i %10 == 9:
        print("%4d: cross_entropy=%10.7f, acc_train=%.3f, acc_test=%.3f"%(i, 
            cross_entropy(C, X_train2, Y_train2),
            accuracy(softmax(X_train2 @ C), Y_train),
            accuracy(softmax(X_test2 @ C), Y_test),
            ))

   9: cross_entropy= 1.2649534, acc_train=0.713, acc_test=0.702
  19: cross_entropy= 1.0132985, acc_train=0.757, acc_test=0.748
  29: cross_entropy= 0.8956163, acc_train=0.777, acc_test=0.767
  39: cross_entropy= 0.8199046, acc_train=0.789, acc_test=0.777
  49: cross_entropy= 0.7656090, acc_train=0.797, acc_test=0.783
  59: cross_entropy= 0.7270596, acc_train=0.803, acc_test=0.787
  69: cross_entropy= 0.6935507, acc_train=0.809, acc_test=0.794
  79: cross_entropy= 0.6654487, acc_train=0.813, acc_test=0.798
  89: cross_entropy= 0.6437228, acc_train=0.817, acc_test=0.802
  99: cross_entropy= 0.6220315, acc_train=0.820, acc_test=0.803


# TensorFlow Intro

in other words: we've learned a lot so far

In [58]:
import tensorflow as tf
x = tf.placeholder(tf.float64, [None, 785])
y = tf.placeholder(tf.float64, [None, 10])
C = tf.Variable(tf.random_normal([785, 10], dtype=tf.float64))

In [59]:
x

<tf.Tensor 'Placeholder_4:0' shape=(?, 785) dtype=float64>

In [60]:
y

<tf.Tensor 'Placeholder_5:0' shape=(?, 10) dtype=float64>

In [61]:
C

<tf.Variable 'Variable_1:0' shape=(785, 10) dtype=float64_ref>

In [62]:
y_pred = tf.nn.softmax( tf.matmul(x,C) )
y_pred

<tf.Tensor 'Softmax:0' shape=(?, 10) dtype=float64>

In [67]:
# def cross_entropy(C, X_train2, Y_train2):
#     # Y_train2 - one-hot-encoded
#     Y_pred =  softmax(X_train2 @ C)
#     return -np.sum(Y_train2*np.log(Y_pred))/X_train2.shape[0]
cross_entropy = -tf.reduce_sum(y*tf.log(y_pred))/tf.cast(tf.shape(x)[0], tf.float64)

In [69]:
#np.mean(one_hot_decode(y_pred) == one_hot_decode(y_test2))
accuracy = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)), tf.float64)
)

In [71]:
# C = C-eta*grad_cross_entropy(C, X_train2, Y_train2)
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)

In [74]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

batch_size = 600
for i in range(100):
    # subset = np.random.randint(0, X_train2.shape[0], batch_size)
    for j in range(X_train2.shape[0]//batch_size):
        subset = np.random.choice(np.arange(X_train2.shape[0]), batch_size, replace=False)
        #C = C-eta*grad_cross_entropy(C, X_train2[subset,:], Y_train2[subset,:])
        sess.run(train_step, feed_dict={
            x: X_train2[subset,:],
            y: Y_train2[subset,:]
        })
    
    if i %10 == 9:
        print("%4d: cross_entropy=%10.7f"%(i, 
            sess.run(cross_entropy, feed_dict={
                x: X_train2,
                y: Y_train2
            })
        ))

   9: cross_entropy= 1.3601004
  19: cross_entropy= 1.0728300
  29: cross_entropy= 0.9284891
  39: cross_entropy= 0.8422055
  49: cross_entropy= 0.7865233
  59: cross_entropy= 0.7414442
  69: cross_entropy= 0.7040721
  79: cross_entropy= 0.6770457
  89: cross_entropy= 0.6529887
  99: cross_entropy= 0.6327627


In [87]:
import keras

Using TensorFlow backend.


In [92]:
from keras.models import Sequential
from keras.layers import Dense,Activation

model = Sequential()
model.add(Dense(10, input_dim=785)) # X -> C=np.random.randn(785,10); X@C ->  Y_1
model.add(Activation("softmax")) # Y_1 -> softmax -> Y_2

model.compile(loss='categorical_crossentropy', optimizer='sgd', 
              metrics=['accuracy'])
model.fit(X_train2, Y_train2, batch_size=100, epochs=3, verbose=1)



Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f64166c6630>

In [115]:
from keras.models import Sequential
from keras.layers import Dense,Activation

model = Sequential()
model.add(Dense(28*4, input_dim=785)) 
model.add(Activation("exponential"))
model.add(Dense(28, input_dim=25)) 
model.add(Activation("relu"))
model.add(Dense(10, input_dim=25)) 
model.add(Activation("softmax"))

model.compile(loss='categorical_crossentropy', optimizer='adam', 
              metrics=['accuracy'])
model.fit(X_train2, Y_train2, batch_size=60, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6414c0ab38>

In [116]:
np.mean(np.argmax(model.predict(X_test2),axis=1) == Y_test)

0.8745