In [None]:
import tensorflow as tf
import numpy as np
from statistics import mean 
with np.load("notMNIST.npz") as data :
    Data, Target = data ["images"], data["labels"]
    posClass = 2
    negClass = 9
    dataIndx = (Target==posClass) + (Target==negClass)
    Data = Data[dataIndx]/255.
    Target = Target[dataIndx].reshape(-1, 1)
    Target[Target==posClass] = 1
    Target[Target==negClass] = 0
    np.random.seed(521)
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    Data, Target = Data[randIndx], Target[randIndx]
    trainData, trainTarget = Data[:3500], Target[:3500]
    validData, validTarget = Data[3500:3600], Target[3500:3600]
    testData, testTarget = Data[3600:], Target[3600:]

In [None]:
def start_train(n_iterations,learning_rate,fix_decay_regularizer,isAdam,batch_size):
    
    m, n, k = trainData.shape
    n_epoch = int(n_iterations*batch_size/m)
    
    #Data Handler
    valid_accuracy_log = np.zeros(n_epoch)
    valid_entropy= np.zeros(n_epoch)
    train_accuracy_log = np.zeros(n_epoch)
    train_entropy = np.zeros(n_epoch)
    
    #Test Data (|concatenate a 28x28 training data point to 1x(784))
    x_in = tf.placeholder(tf.float32,[None,n*k], name="dataset_in")
    y_in = tf.placeholder(tf.float32, [None,1], name="true_value")
    
    #Validation Data
    valid_data = tf.constant(validData.reshape(-1,n*k),dtype=tf.float32, name="validation_data")
    valid_label = tf.constant(validTarget,dtype=tf.float32, name="validation_label")
    
    #Weights
    w = tf.Variable(tf.zeros([n*k, 1], dtype=np.float32), name="weight")
    b = tf.Variable(tf.zeros([1], dtype=np.float32), name="bias")
    
    if fix_decay_regularizer:
        decaybias = 0
    else:
        decaybias = 0.01 #tf.Variable(0.01, name="decay_regularizer")
    
    #optimizer
    if isAdam:
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    else:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    #train Loss functions with SGD
    y_pred = tf.add(tf.matmul(x_in,w),b)
    loss_weight_decay=tf.nn.l2_loss(w) #0 anyways %loss_weight_decay=h*tf.square(tf.norm(w,ord=2)) #0 anyways
    train_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits= y_pred,labels = y_in)
    train_loss_total = tf.reduce_mean(tf.add(train_cross_entropy,tf.multiply(decaybias,loss_weight_decay)))
    
    #Validation Loss functions
    valid_prediction = (tf.add(tf.matmul(valid_data,w), b))
    valid_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits= valid_prediction,labels = valid_label)
    valid_loss_total = tf.reduce_mean(tf.add(valid_cross_entropy,tf.multiply(decaybias,loss_weight_decay)))
    
    #Validation and Data Loggers
    #since we are a probability, if greater than 50%, set to 1 else set to 0 (thats why we round)
    _, train_acc_log = tf.metrics.accuracy(labels=y_in, predictions=tf.round(tf.sigmoid(y_pred)))
    _, valid_acc_log = tf.metrics.accuracy(labels=valid_label, predictions=tf.round(tf.sigmoid(valid_prediction)))
    
    #Train
    train = optimizer.minimize(train_loss_total)
    
    #initialized all vars
    init = tf.global_variables_initializer()
    
    #run!
    with tf.Session() as sess:
        #Local variables for accuracy metric
        sess.run(tf.local_variables_initializer())
        sess.run(init)
        for epoch in range(n_epoch):
            #clear our history (we use this to calculate the average accuracy/loss for each iteration)
            entropy_history = 0
            accuracy_history = 0
            #reshuffles the dataset in unison. for each epoch
            rng_state = np.random.get_state()
            np.random.shuffle(trainData)
            np.random.set_state(rng_state)
            np.random.shuffle(trainTarget)
            #every 100 epoch, update the user
            #batches of batch_size up to total number of datapoints (3500)
            for offset in range(0, 3500, batch_size):
                #find the proper dataset
                batch_x = trainData[offset:offset+batch_size]
                batch_y = trainTarget[offset:offset+batch_size]
                #train
                sess.run((train),feed_dict={x_in:batch_x.reshape(-1,n*k),y_in:batch_y})
                #add to history
                entropy_history = entropy_history + sess.run(train_loss_total,feed_dict={x_in:batch_x.reshape(-1,n*k),y_in:batch_y})
                accuracy_history= accuracy_history + sess.run(train_acc_log,feed_dict={x_in:batch_x.reshape(-1,n*k),y_in:batch_y})
    
            #calculate and store the average history for that iteration
            train_entropy[epoch] = entropy_history/(3500/batch_size)
            train_accuracy_log[epoch] = accuracy_history/(3500/batch_size)
            #store validation data
            valid_accuracy_log[epoch] = sess.run(valid_acc_log)
            valid_entropy[epoch] = sess.run(valid_loss_total)
        #evaluate test data
        best_weight = w.eval()
        best_bias = b.eval()
        test_accuracy = start_test(best_weight,best_bias)
        return train_entropy,train_accuracy_log,valid_accuracy_log,valid_entropy,test_accuracy

In [None]:
#FOR PART 1
#for storing data
train_acc_log = np.zeros(len(trainData))
valid_acc_log = np.zeros(len(trainData))
valid_loss_log = np.zeros(len(trainData))
train_loss = np.zeros(len(trainData))

isAdam = False
#for averaging
#train_acc_avrg=0
#train_loss_avrg=0
##valid_acc_avrg=0
#valid_loss_avrg=0
#test_acc_avrg = 0

#m=2
#for i in range(m):
train_loss,train_acc_log,valid_acc_log,valid_loss_log,test_acc_log= start_train(5000,0.001,False,isAdam,500) 
    
#    train_acc_avrg = train_acc_avrg + mean(train_acc_log)
#    train_loss_avrg = train_loss_avrg + mean(train_loss)
#    valid_acc_avrg = valid_acc_avrg + mean(valid_acc_log)
#    valid_loss_avrg = valid_loss_avrg + mean(valid_loss_log)
#    test_acc_avrg = test_acc_avrg + test_acc_log

#train_acc_avrg = train_acc_avrg/m
#train_loss_avrg = train_loss_avrg/m
#valid_acc_avrg = valid_acc_avrg/m
#valid_loss_avrg = valid_loss_avrg/m
#test_acc_avrg=test_acc_avrg/m

#i just plot the most recent data. not the average. the average was used to find the end data
import matplotlib.pyplot as p 
p.plot(train_loss,'b-', label="train")
p.plot(train_acc_log,'b-')
p.plot(valid_acc_log,'r-',label ="validation")
p.plot(valid_loss_log,'r-')
p.legend(numpoints = 1)
p.show()
print("test_acc=",test_acc_log)
print("train_accuracy=",np.max(train_acc_log),"validation_accuracy=",np.max(valid_acc_log),"train_loss function=",np.min(train_loss))
#avrg_min_loss,avrg_max_acc

In [None]:
#for learning 0.005
#test_acc= 0.97931033
#train_accuracy= 0.9803337710244315 validation_accuracy= 0.979593813419342 train_loss function= 0.06626365067703384

#for learning 0.001
#test_acc= 0.9655172
#train_accuracy= 0.9767628141811916 validation_accuracy= 0.9800000190734863 train_loss function= 0.07979274221829005

#for learning rate 0.0001
#test_acc= 0.9724138
#train_accuracy= 0.9713712079184396 validation_accuracy= 0.9800000190734863 train_loss function= 0.16093909527574266


In [None]:
#FOR PART 2
#for storing data
SGD_loss = np.zeros(len(trainData))
Adam_loss_log = np.zeros(len(trainData))

isAdam = False
SGD_loss,_,_,_,_= start_train(5000,0.001,False,isAdam,500) 
isAdam = True 
Adam_loss_log,_,_,_,_= start_train(5000,0.001,False,isAdam,500) 

import matplotlib.pyplot as p 
p.plot(SGD_loss,'b-', label="train_SGD")
p.plot(Adam_loss_log,'r-',label ="train_Adam")
p.legend(numpoints = 1)
p.show()
print("Adam_loss=",np.max(Adam_loss_log),"SGD_Loss=",np.max(SGD_loss))
#avrg_min_loss,avrg_max_acc