# PyMarek day 3

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics

import os, os.path, gzip, tempfile, urllib.request

def load_mnist(kind='train', dataset='zalando'): # 'train' or 't10k'
    """based on https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py"""

    if dataset=='zalando':
        url_base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/'
    else:
        url_base = 'http://yann.lecun.com/exdb/mnist/'

    url_labels = url_base+'%s-labels-idx1-ubyte.gz'%kind
    url_images = url_base+'%s-images-idx3-ubyte.gz'%kind

    file_labels = os.path.join(tempfile.gettempdir(), '%s-labels-idx1-ubyte.gz'%kind)
    file_images = os.path.join(tempfile.gettempdir(), '%s-images-idx3-ubyte.gz'%kind)

    if not os.path.exists(file_labels):
        urllib.request.urlretrieve(url_labels, file_labels)

    if not os.path.exists(file_images):
        urllib.request.urlretrieve(url_images, file_images)

    with gzip.open(file_labels, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(file_images, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    assert len(images.shape)==2
    assert len(labels.shape)==1
    assert images.shape[0] == labels.shape[0]
    assert images.shape[1] == 28*28
    return images, labels

X_train, Y_train = load_mnist('train')
X_test,  Y_test  = load_mnist('t10k')

X_train = X_train/255
X_test = X_test/255

def one_hot_encode(Y):
    k = np.max(Y)+1
    return np.eye(k)[Y,:]

Y_train2 = one_hot_encode(Y_train)
Y_test2  = one_hot_encode(Y_test)

def one_hot_decode(Y2):
    return np.argmax(Y2, axis=1)

def mode(Y):
    vals, cnts = np.unique(Y, return_counts=True)
    return np.random.choice(vals[cnts==cnts.max()], 1)

  return f(*args, **kwds)


In [2]:
# we add a column of ones to multiply with the bias
X_test2  = np.insert(X_test, 0, 1, axis=1)
X_train2 = np.insert(X_train, 0, 1, axis=1)

In [3]:
def softmax(m): # 
    m2 = np.exp(m)
    return m2 / np.sum(m2, axis=1).reshape(-1,1)


In [4]:
np.random.seed(243)
C = np.random.randn(785, 10)
Y_pred = softmax(X_train2 @ C)

In [None]:
np.round(Y_pred[:20,:],2)

array([[1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.04, 0.  , 0.  , 0.01, 0.  , 0.  , 0.93, 0.  , 0.01, 0.  ],
       [0.  , 0.  , 0.67, 0.27, 0.  , 0.  , 0.02, 0.  , 0.01, 0.03],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.99, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.65, 0.34, 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.  ],
       [0.06, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.93, 0.  , 0.  ],
       [0.99, 0.  , 0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.65, 0.  , 0.03, 0.  , 0.  , 0.  , 0.32, 0.  , 0.  , 0.  ],
       [0.02, 0.  , 0.  , 0.  , 0.  , 0.  , 0.02, 0.97, 0.  , 0.  ],
       [1.  , 0.  , 0.  , 0.  , 0.

In [None]:
one_hot_decode(Y_pred).shape

(60000,)

In [None]:
X_train.shape

(60000, 784)

In [None]:
Y_train.shape

(60000,)

In [None]:
# accuracy
def accuracy(Y_pred, Y_train):
    return np.mean(one_hot_decode(Y_pred) == Y_train)


In [None]:
def cross_entropy(C, X_train2, Y_train2):
    Y_pred = softmax (X_train2 @ C)
    return -np.sum(Y_train2*np.log(Y_pred))/X_train2.shape[0]

In [None]:
# get C by random choices:
best_C = None
best_error = np.inf
for i in range(1000):
    C = np.random.randn(785, 10)
    err = cross_entropy(C, X_train2, Y_train2)
    if err < best_error:
        best_error = err
        best_C = C
        print(best_error, accuracy(softmax(X_train2@C), Y_train))

24.874622818821926 0.09771666666666666
17.98062495690606 0.16611666666666666
15.990783873518549 0.11618333333333333
15.113912510623697 0.07095
13.27732079873625 0.1473
10.886799277223394 0.1171
10.371417807673776 0.116
10.217930227696337 0.15591666666666668


In [None]:
# better accuracy doesnt always mean better cross-entropy, they are different measures

In [None]:
def grad_cross_entropy(C, X_train2, Y_train2):
    Y_pred = softmax(X_train2 @ C)
    return -X_train2.T @ (Y_train2 - Y_pred) / X_train2.shape[0]

In [None]:
# Gradient Descent
np.random.seed(123)
C = np.random.randn(785, 10)
eta = 0.1 # learning rate
for i in range(100):
    C = C - eta*grad_cross_entropy(C, X_train2, Y_train2)
    
    print(f'''iteration: {i} - 
              cross_entropy: {cross_entropy(C, X_train2, Y_train2)} - 
              acc_train: {accuracy(softmax(X_train2 @ C), Y_train)} - 
              acc_test: {accuracy(softmax(X_test2 @ C), Y_test)}''')


In [None]:
# Mini-batch Gradient Descent
np.random.seed(123)
C = np.random.randn(785, 10)
eta = 0.1 # learning rate
batch_size = 600
for i in range(100):
    # subset = np.random.randint(0, X_train2.shape[0], batch_size)
    for j in range(X_train2.shape[0]//batch_size):
        subset = np.random.choice(np.arange(X_train2.shape[0]), batch_size, replace=False)
        C = C - eta*grad_cross_entropy(C, X_train2, Y_train2)
    
    if i %10 == 9:
        print(f'''iteration: {i} - 
                  cross_entropy: {cross_entropy(C, X_train2, Y_train2)} - 
                  acc_train: {accuracy(softmax(X_train2 @ C), Y_train)} - 
                  acc_test: {accuracy(softmax(X_test2 @ C), Y_test)}''')

## TensorFlow Intro

in other words: we've learned a lot so far

In [None]:
import tensorflow as tf

x = tf.placeholder(tf.float64, [None, 785])
y = tf.placeholder(tf.float64, [None, 10])
C = tf.Variable(tf.random_normal([785, 10], dtype=tf.float64))

In [None]:
x

In [None]:
y

In [None]:
C

In [None]:
y_pred = tf.nn.softmax(tf.matmul(x, C))
y_pred

In [None]:
cross_entropy = -tf.reduce_sum(y*tf.log(y_pred))/tf.cast(tf.shape(x)[0], tf.float64)
cross_entropy

In [None]:
#np.mean(one_hot_decode(y_pred) == one_hot_decode(y_test2))
accuracy = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)), tf.float64)
)

In [None]:
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)

In [None]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

batch_size = 600
for i in range(100):
    # subset = np.random.randint(0, X_train2.shape[0], batch_size)
    for j in range(X_train2.shape[0]//batch_size):
        subset = np.random.choice(np.arange(X_train2.shape[0]), batch_size, replace=False)
        #C = C - eta*grad_cross_entropy(C, X_train2, Y_train2)
        sess.run(train_step, feed_dict={
            x: X_train2[subset,:],
            y: Y_train2[subset,:]
        })
    
    if i %10 == 9:
        print(f'''iteration: {i} - 
                  cross_entropy: {sess.run(cross_entropy, feed_dict={
                        x: X_train2,
                        y: Y_train2
                  })}''')

In [1]:
import keras

Using Theano backend.


ModuleNotFoundError: No module named 'theano'