In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
data = np.loadtxt('../large_files/train.csv', delimiter=',', skiprows=1, dtype=int)

def set_up_data(d):
    # shuffle the data
    np.random.shuffle(d)

    # split into X and Y (labels are first column)
    X = d[:, 1:]
    Y = d[:, 0]

    # split into train/test
    r, c = np.shape(d)
    r_90 = r // 90
    X_train, X_test = X[:-r_90], X[-r_90:]
    Y_train, Y_test = Y[:-r_90], Y[-r_90:]

    # normalize.
    # takes mean/std for each col (??)
    mu = X_train.mean(axis=0)
    std = X_train.std(axis=0)
    
    # for all-black images (std == 0), change std to 1 to prevent /0 errors
    idx = np.where(std == 0)[0]
    assert(np.all(std[idx]) == 0)

    np.place(std, std == 0, 1)

    X_train = (X_train - mu) / std
    X_test = (X_test - mu) / std
    
    return X_train, X_test, Y_train, Y_test

def onehot_encode(y):
    N = len(y)
    min_y, max_y = np.min(y), np.max(y)
    K = (max_y - min_y) + 1

    encoded_y = np.zeros((N, K))

    for i in range(N):
        col_val = y[i]
        encoded_y[i, col_val] = 1
    
    return encoded_y

def error_rate(p, t):
    return np.mean(p != t)

In [73]:
def train():
    # step 1: get the data and define all the usual variables
    Xtrain, Xtest, Ytrain, Ytest = set_up_data(data)

    max_iter = 15
    print_period = 50

    lr = 0.001
    reg = 0.01

    N, D = Xtrain.shape
    batch_sz = 500
    n_batches = N // batch_sz

    # define the model
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(300, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(reg)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(reg)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
        tf.keras.layers.Softmax()
    ])

    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
    
    model.compile(optimizer=opt,
              loss=loss_fn,
              metrics=['accuracy'])
    
    model.fit(Xtrain, Ytrain, epochs=5)

    model.evaluate(Xtest,  Ytest, verbose=2)

    print(tf.argmax(model(Xtest[:5]),1).numpy())
    print(Ytest[:5])

    return model



model = train()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
15/15 - 0s - loss: 0.4866 - accuracy: 0.9270
[6 2 5 0 3]
[6 2 5 0 3]


In [78]:
def train_2():
    # step 1: get the data and define all the usual variables
    Xtrain, Xtest, Ytrain, Ytest = set_up_data(data)

    max_iter = 15
    print_period = 50

    lr = 0.001
    reg = 0.01

    N, D = Xtrain.shape
    batch_sz = 500
    n_batches = N // batch_sz

    # define the model
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(300, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(reg)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l2(reg)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
        tf.keras.layers.Softmax()
    ])

    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
    
    model.compile(optimizer=opt,
              loss=loss_fn,
              metrics=['accuracy'])
    
    model.fit(Xtrain, Ytrain, epochs=10, batch_size = batch_sz)

    model.evaluate(Xtest,  Ytest, verbose=2)

    print(tf.argmax(model(Xtest[:5]),1).numpy())
    print(Ytest[:5])

    return model



model_2 = train_2()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
15/15 - 0s - loss: 0.3498 - accuracy: 0.9485
[3 3 8 1 9]
[3 3 8 1 9]
