In [2]:
%matplotlib inline
import math as math
import random as rand
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold

from datetime import datetime
import statistics 
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from keras.datasets import mnist
from keras.utils import to_categorical

Using TensorFlow backend.


In [3]:
import cProfile
import re

def debug(detail,the_output):
    if detail < 1:
        print(datetime.now(),the_output)
    return

pd.set_option('display.max_rows', 120)
pd.set_option('display.max_columns', 999)
pd.set_option('display.width', 1000)
np.set_printoptions(edgeitems=150,linewidth=200)

In [23]:
class Config:
    NUM_X_PIXELS=28
    NUM_Y_PIXELS=28
    PIXEL_CUTOFF=50
    MAX_NOISE_ADD_TO_PIXEL=30
    NUM_SPHERES=8
    NUM_SECTORS_QUAD_1=3
    NUM_SECTORS=4*NUM_SECTORS_QUAD_1
    NUM_POWER_SUMS=1
    KNOWN_NUM_CLUSTERS=10
    TRAIN_NUM_ROWS=50_000#
    TEST_NUM_ROWS=10_000#
    NUM_NEIGHBORS=9
    USE_AREA_INVAR=False
    SIMILARITY_VECTOR_TYPE=1
    NUM_EPOCHS=5
    NUM_KERAS_TRAIN_IMAGES=60_000
    NUM_KERAS_TRAIN_LABELS=NUM_KERAS_TRAIN_IMAGES
    NUM_KERAS_TEST_IMAGES=10_000
    NUM_KERAS_TEST_LABELS=NUM_KERAS_TEST_IMAGES
    SCALE_SIMILARITY_VECTORS=True
    USE_CENTROIDS=False
    TRANSFORM_SHRINK=False
    TRANSFORM_PCT_IMAGES=50
    DO_PROFILING=False

# Get the similarity vector, the object under investigation


In [19]:
slope_bdy_list=[np.tan(2*np.pi*kth_sector/Config.NUM_SECTORS) for kth_sector \
                in range(-Config.NUM_SECTORS_QUAD_1+1,Config.NUM_SECTORS_QUAD_1+1)]
slope_bdy_np=np.array(slope_bdy_list)

#
# Increment the entry that corresponds to the sector and sphere that a
# single point occurs in. Use (slope,dist,direction) to determine that
# entry num.
#
def incr_entry_num(all_vectors_df,max_dist):
    slope=all_vectors_df['slope']
    dist=all_vectors_df['dist']
    direction=all_vectors_df['direction']

    if Config.USE_AREA_INVAR:
        if len(np.where(dist<=radius_bdy_np)):
            which_sphere_num=int(Config.NUM_SPHERES-1)
        else:
            which_sphere_num=np.min(np.where(dist<=radius_bdy_np))
    else:
        which_sphere_num=np.where(dist >= max_dist,Config.NUM_SPHERES-1,\
                              (dist*Config.NUM_SPHERES)//max_dist)
    slope_in_array_ndx=[np.min(np.where(one_slope<=slope_bdy_np),\
                        initial=(len(slope_bdy_np)-1)) \
                        for one_slope in slope]
    closest_slope_ndx=np.where(np.isnan(slope),0,slope_in_array_ndx)
        
    which_entries=(2*closest_slope_ndx+direction+\
                        Config.NUM_SECTORS*which_sphere_num).astype(int)
    return which_entries
#
#
def sum_vector_powers(vector_np,vector_invariants_np):
    total_num_points=np.sum(vector_np)
    vector_density_np=(vector_np/total_num_points)
    for k in range(1,Config.NUM_POWER_SUMS+1):
        for j in range(Config.NUM_SPHERES):
            vector_invariants_np[j+(k-1)*Config.NUM_SPHERES]=np.float_power(\
                np.sum(np.float_power(\
                vector_density_np[j*Config.NUM_SECTORS:\
                                  (j+1)*Config.NUM_SECTORS],k))\
                                                       ,float(1/k))
    return vector_invariants_np

#
# get_one_similarity_vector(one_image)
#
def get_one_similarity_vector(one_image):
    ############
    one_image=one_image.T
    if one_image.sum()==0:
        print('image is all zeros')
        return
    #
    # make the similarity_vector 
    # a vector of random numbers to compare against "real" vectors
    #
    if Config.SIMILARITY_VECTOR_TYPE==-1:
        similarity_vector=np.random.randint(0,high=256,\
                            size=(Config.NUM_SECTORS*Config.NUM_SPHERES))
        return similarity_vector
    #
    # add noise to the image
    #
    if Config.SIMILARITY_VECTOR_TYPE==-2:
        one_image+=np.random.randint(0,high=MAX_NOISE_ADD_TO_PIXEL,\
                        size=(Config.NUM_X_PIXELS*Config.NUM_Y_PIXELS))

    one_image_np=np.resize(one_image, (Config.NUM_X_PIXELS,\
                                       Config.NUM_Y_PIXELS))

    # Get the centroid
    coord_arrays=np.where(one_image_np>Config.PIXEL_CUTOFF)
    x_origin=np.mean(coord_arrays[0])
    y_origin=np.mean(coord_arrays[1])

    # Get all the columns that describe each (x,y) point relative to the centroid
    all_vectors_df=pd.DataFrame((coord_arrays[0]-x_origin,\
                                 coord_arrays[1]-y_origin)).T
    all_vectors_df['dist']=np.sqrt((all_vectors_df[0]*all_vectors_df[0]+\
        all_vectors_df[1]*all_vectors_df[1]))
    all_vectors_df['slope']=all_vectors_df[1]/all_vectors_df[0]
    all_vectors_df['direction']=(all_vectors_df[0]<np.abs(all_vectors_df[0]))

    # initialize np arrays to calculate sector and sphere placement
    # of each (x,y) point
    num_sector_sphere_pixels_np=\
        np.zeros(Config.NUM_SECTORS*Config.NUM_SPHERES,dtype=int)
    vector_invariants_np=\
        np.zeros(Config.NUM_SPHERES*Config.NUM_POWER_SUMS,dtype=float)
    max_dist=all_vectors_df['dist'].max()
    radius_bdy_list=[max_dist*np.sqrt((kth_sphere+1)/Config.NUM_SPHERES) \
                     for kth_sphere in range(Config.NUM_SPHERES)]
    radius_bdy_np=np.array(radius_bdy_list)

    if max_dist == 0:
        print('max_dist is 0 !!')
        return

    which_entries=incr_entry_num(all_vectors_df,max_dist)
    for which_entry in which_entries:
        num_sector_sphere_pixels_np[which_entry]+=1 
    
    similarity_vector=sum_vector_powers(num_sector_sphere_pixels_np,\
                                       vector_invariants_np)
    
    #
    # Choose whether to return the similarity_vector, the 
    # num_sector_sphere_pixels_np, the entire image
    # or some combination of these three choices.
    #
    if Config.SIMILARITY_VECTOR_TYPE==0:
        return np.hstack((num_sector_sphere_pixels_np, similarity_vector))
    elif Config.SIMILARITY_VECTOR_TYPE==1:
        return similarity_vector
    elif Config.SIMILARITY_VECTOR_TYPE==2:
        return num_sector_sphere_pixels_np
    elif Config.SIMILARITY_VECTOR_TYPE==3:
        return np.hstack((one_image.to_numpy(), num_sector_sphere_pixels_np, \
                          similarity_vector))
    elif Config.SIMILARITY_VECTOR_TYPE==4:
        return np.hstack((one_image.to_numpy(), similarity_vector))
    else:# Config.SIMILARITY_VECTOR_TYPE==5:
        return one_image.to_numpy()

#
# get_similarity_vectors(all_images_no_label)
#
def get_similarity_vectors(all_images_no_label):
    debug(0,('Start get_similarity_vectors() processing'))
    all_similarity_vectors=pd.DataFrame()
    for kth_image in range(len(all_images_no_label)):
        if (kth_image % 1000)==999:
            debug(0,('we have processed ',kth_image,' images'))

        one_image=all_images_no_label.iloc[kth_image]
        similarity_vector_np=get_one_similarity_vector(one_image)
        similarity_vector_df = pd.DataFrame(np.resize(similarity_vector_np,\
                                                      (1,len(similarity_vector_np))))
        if np.isnan(similarity_vector_np).any():
            print('similarity_vector_np is NULL START==================================================')
            print(similarity_vector_df)
            print('similarity_vector_np is NULL END==================================================')
        else:
            all_similarity_vectors=all_similarity_vectors.append(similarity_vector_df)
    debug(0,('End get_similarity_vectors() processing'))
    return all_similarity_vectors

# are the class sizes unbalanced?
Not really as the next cell shows

In [6]:
def is_unbalanced(image_label):
    print('The range (min num reps of a digit, max num reps of a digit):')
    print(pd.value_counts(image_label).min(),pd.value_counts(image_label).max())
    return

# Train to recognize MNIST and return all similarity vectors

In [7]:
def training(image_label,image_no_label):
    print('image_label,image_no_label',len(image_label),len(image_no_label))
    is_unbalanced(image_label)

    all_similarity_vectors=pd.DataFrame()
    
    if Config.DO_PROFILING:
        pr = cProfile.Profile()
        pr.enable()
    all_similarity_vectors=get_similarity_vectors(image_no_label)

    if Config.DO_PROFILING:
        pr.disable()
        pr.print_stats()

    similarity_sc=StandardScaler()
    all_similarity_vectors_sc=similarity_sc.fit_transform(all_similarity_vectors)

    if Config.USE_CENTROIDS:
        kmeans_similarity=KMeans(n_clusters=Config.KNOWN_NUM_CLUSTERS,\
                                 random_state=42)
        kmeans_similarity.fit(all_similarity_vectors_sc)
        similarity_train_labels=kmeans_similarity.labels_
    else:
        knn=KNeighborsClassifier(n_neighbors=Config.NUM_NEIGHBORS)
        X_train_sc=all_similarity_vectors_sc
        y_train=image_label

        knn.fit(X_train_sc,y_train)
        train_score=knn.score(X_train_sc,y_train)
    print('The training accuracy is: ',train_score)
        
    return knn,all_similarity_vectors

# Test recognizing MNIST and return all similarity vectors

In [8]:
def testing(image_label,image_no_label,knn):
    is_unbalanced(image_label)
    
    base_sc=StandardScaler()
    image_no_label_sc=base_sc.fit_transform(image_no_label)

    X_test=get_similarity_vectors(image_no_label)
    
    y_test=image_label
    ss_test=StandardScaler()
    X_test_sc=ss_test.fit_transform(X_test)
    
    test_score=knn.score(X_test_sc,y_test)
    print('The testing accuracy is ',test_score)
    return X_test

# Get accuracy of kNN and of similarity vector in conjunction with kNN

In [9]:
def get_kNN_accuracy():
    debug(0,'Start of KNN model')
    all_images_csv=pd.read_csv('./mnist-in-csv/mnist_trainSCOTT.csv',\
        skiprows=0,\
        nrows=Config.TRAIN_NUM_ROWS+Config.TEST_NUM_ROWS)

    if len(all_images_csv)==0:
        print('MNIST is all used up')

    train_images_csv=all_images_csv[0:Config.TRAIN_NUM_ROWS]
    test_images_csv=all_images_csv[Config.TRAIN_NUM_ROWS:\
                                   Config.TRAIN_NUM_ROWS+Config.TEST_NUM_ROWS]
    train_image_label=train_images_csv['label']
    train_image_no_label=train_images_csv[train_images_csv.columns[1:]]

    test_image_label=test_images_csv['label']
    test_image_no_label=test_images_csv[test_images_csv.columns[1:]]

    knn,train_similarity_vectors=training(train_image_label,train_image_no_label)

    test_similarity_vectors=testing(test_image_label,test_image_no_label,knn)
    debug(0,'End of KNN model')
    return

# Results for improving against kNN

accuracy   invariant vector description

0.9025 np.hstack((num_sector_sphere_pixels_np, similarity_vector))

0.8855 num_sector_sphere_pixels_np

0.888  np.hstack((image_no_label.to_numpy(), num_sector_sphere_pixels_np, similarity_vector))

0.8775 np.hstack((image_no_label.to_numpy(), similarity_vector))

0.8645 image_no_label.to_numpy()

0.6063 similarity_vector

# Verbatim from Deep Learning with Python...
...by Francois Chollet, 2018 (page 120-122) including the hardcoded constants.

In [10]:
def verbatim_from_book_CNN_orig():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32,(3,3), activation='relu',input_shape=(28,28,1)))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Conv2D(64,(3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Conv2D(64,(3,3), activation='relu'))

    model.add(layers.Flatten())
    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dense(10,activation='softmax'))

    (train_images, train_labels),(test_images,test_labels)=mnist.load_data()

    train_images=train_images[0:Config.NUM_KERAS_TRAIN_IMAGES]
    train_labels=train_labels[0:Config.NUM_KERAS_TRAIN_LABELS]

    test_images=test_images[0:Config.NUM_KERAS_TEST_IMAGES]
    test_labels=test_labels[0:Config.NUM_KERAS_TEST_LABELS]

    train_images=train_images.reshape((Config.NUM_KERAS_TRAIN_IMAGES,28,28,1))#60000
    train_images=train_images.astype('float32')/255

    test_images=test_images.reshape((Config.NUM_KERAS_TEST_IMAGES,28,28,1))#10000
    test_images=test_images.astype('float32')/255

    train_labels=to_categorical(train_labels)
    test_labels=to_categorical(test_labels)

    model.compile(optimizer='rmsprop',loss='categorical_crossentropy',\
                  metrics=['accuracy'])

    debug(0,'Start of fit')
    model.fit(train_images, train_labels,epochs=5,batch_size=64)
    debug(0,'End of fit')

    test_loss, test_acc = model.evaluate(test_images,test_labels)
    test_acc
    return

In [11]:
def create_CNN_model(num_x_pixels,num_y_pixels):
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32,(3,3), activation='relu',\
                            input_shape=(num_x_pixels,num_y_pixels,1)))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Conv2D(64,(3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Conv2D(64,(3,3), activation='relu'))

    model.add(layers.Flatten())
    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dense(Config.KNOWN_NUM_CLUSTERS,activation='softmax'))
    return model

In [12]:
def train_eval_CNN_model(model,train_images,train_labels,\
                         test_images,test_labels,num_extra_rows):
    train_images=train_images.reshape((len(train_images),\
                                       Config.NUM_X_PIXELS+num_extra_rows,\
                                       Config.NUM_Y_PIXELS,1))#60000
    train_images=train_images.astype('float32')/255

    test_images=test_images.reshape((len(test_images),\
                                     Config.NUM_X_PIXELS+num_extra_rows,\
                                     Config.NUM_Y_PIXELS,1))#10000
    test_images=test_images.astype('float32')/255

    train_labels=to_categorical(train_labels)
    test_labels=to_categorical(test_labels)

    model.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])

    debug(0,'Start of fit')
    model.fit(train_images,train_labels,epochs=5,batch_size=64,verbose=1)
    debug(0,'End of fit')

    scores = model.evaluate(test_images,test_labels,verbose=0)
    return scores

In [61]:
def shrink_transform(one_image):
    if np.random.randint(0,high=100) >= Config.TRANSFORM_PCT_IMAGES:
        return one_image
#     print(one_image.shape)
    image_shrunk=np.zeros((Config.NUM_X_PIXELS,Config.NUM_Y_PIXELS))
    for x_coord in range(Config.NUM_X_PIXELS):
        for y_coord in range(Config.NUM_Y_PIXELS):
            image_shrunk[x_coord//2,y_coord//2]+=one_image[x_coord,y_coord]/4
    image_shrunk=image_shrunk.astype(int)
#     print(image_shrunk)
    return image_shrunk

In [68]:
def do_transforms(train_images,test_images):
    if Config.TRANSFORM_SHRINK:
        for one_image_ndx in range(len(train_images)):
            if (one_image_ndx % 1000)==999:
                debug(0,('we have transformed ',one_image_ndx,' training images'))

            train_images[one_image_ndx]=shrink_transform(train_images[one_image_ndx])
        for one_image_ndx in range(len(test_images)):
            if (one_image_ndx % 1000)==999:
                debug(0,('we have transformed ',one_image_ndx,' testing images'))

            test_images[one_image_ndx]=shrink_transform(test_images[one_image_ndx])
    return train_images,test_images

In [63]:
# Config.NUM_KERAS_TRAIN_IMAGES=60
# Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES
# Config.NUM_KERAS_TEST_IMAGES=10
# Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

# Config.TRANSFORM_SHRINK=True
# (train_images, train_labels),(test_images,test_labels)=mnist.load_data()

# train_images=train_images[0:Config.NUM_KERAS_TRAIN_IMAGES]
# train_labels=train_labels[0:Config.NUM_KERAS_TRAIN_LABELS]

# test_images=test_images[0:Config.NUM_KERAS_TEST_IMAGES]
# test_labels=test_labels[0:Config.NUM_KERAS_TEST_LABELS]

# train_images,test_images=do_transforms(train_images,test_images)


In [64]:
def verbatim_from_book_CNN():
    debug(0,'Start of verbatim_from_book_CNN')
    (train_images, train_labels),(test_images,test_labels)=mnist.load_data()

    train_images=train_images[0:Config.NUM_KERAS_TRAIN_IMAGES]
    train_labels=train_labels[0:Config.NUM_KERAS_TRAIN_LABELS]

    test_images=test_images[0:Config.NUM_KERAS_TEST_IMAGES]
    test_labels=test_labels[0:Config.NUM_KERAS_TEST_LABELS]

    train_images,test_images=do_transforms(train_images,test_images)

    X=np.vstack((train_images,test_images))
    Y=np.hstack((train_labels,test_labels))

    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    cvscores = []
    for train, test in kfold.split(X, Y):
        model=None
        model=create_CNN_model(Config.NUM_X_PIXELS,Config.NUM_Y_PIXELS)
        scores=train_eval_CNN_model(model,X[train],Y[train],\
                                    X[test], Y[test],0)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        cvscores.append(scores[1] * 100)
    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
    debug(0,'end of verbatim_from_book_CNN')
    return

# Get StratifiedKFold accuracy of verbatim_from_book_CNN

In [13]:
Config.NUM_KERAS_TRAIN_IMAGES=600
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES
Config.NUM_KERAS_TEST_IMAGES=100
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-09 07:33:21.677490 Start of verbatim_from_book_CNN
2019-12-09 07:33:23.177423 Start of fit
Train on 625 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:33:40.272776 End of fit
accuracy: 85.33%
2019-12-09 07:33:41.730317 Start of fit
Train on 626 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:33:57.122262 End of fit
accuracy: 86.49%
2019-12-09 07:33:58.408061 Start of fit
Train on 627 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:34:14.940364 End of fit
accuracy: 76.71%
2019-12-09 07:34:16.506007 Start of fit
Train on 629 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:34:33.522802 End of fit
accuracy: 91.55%
2019-12-09 07:34:35.009570 Start of fit
Train on 630 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:34:51.307092 End of fit
accuracy: 88.57%
2019-12-09 07:34:52.992578 Start of fit
Train on 630 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
Config.NUM_KERAS_TRAIN_IMAGES=6_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES
Config.NUM_KERAS_TEST_IMAGES=1_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-09 07:36:21.748243 Start of verbatim_from_book_CNN
2019-12-09 07:36:22.758591 Start of fit
Train on 6296 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:38:27.053085 End of fit
accuracy: 97.16%
2019-12-09 07:38:29.867204 Start of fit
Train on 6297 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:40:34.870180 End of fit
accuracy: 96.87%
2019-12-09 07:40:37.987282 Start of fit
Train on 6297 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:42:43.148185 End of fit
accuracy: 95.31%
2019-12-09 07:42:46.243481 Start of fit
Train on 6298 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:44:51.221241 End of fit
accuracy: 97.86%
2019-12-09 07:44:54.431070 Start of fit
Train on 6298 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:47:00.352129 End of fit
accuracy: 96.58%
2019-12-09 07:47:02.813135 Start of fit
Train on 6301 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoc

# Get StratifiedKFold accuracy of similarity vector in conjunction with verbatim_from_book_CNN

In [69]:
#
#
#
def append_sim_vec(images_np,similarity_vectors):
    num_extra_rows=1+(len(similarity_vectors.T)//Config.NUM_X_PIXELS)
    num_zero_cols_last_row=Config.NUM_X_PIXELS-\
                np.mod(len(similarity_vectors.T),Config.NUM_X_PIXELS)

    similarity_vectors=np.hstack((similarity_vectors,
        np.zeros((len(similarity_vectors),num_zero_cols_last_row),\
                 dtype=float)))

    images_plus_sim=np.hstack((images_np,similarity_vectors))

    images=images_plus_sim.reshape((len(similarity_vectors),\
                                       Config.NUM_X_PIXELS+num_extra_rows,\
                                       Config.NUM_Y_PIXELS,\
                                       1))
    images=images.astype('float32')/255
    return images,num_extra_rows

def test_CNN_with_similarity_vector():
    debug(0,('Start of CNN model with similarity_vector TYPE=',\
             Config.SIMILARITY_VECTOR_TYPE))
    (train_images, train_labels),(test_images,test_labels)=mnist.load_data()
    train_images=train_images[0:Config.NUM_KERAS_TRAIN_IMAGES]
    train_labels=train_labels[0:Config.NUM_KERAS_TRAIN_LABELS]

    test_images=test_images[0:Config.NUM_KERAS_TEST_IMAGES]
    test_labels=test_labels[0:Config.NUM_KERAS_TEST_LABELS]
    print(type(train_images),type(train_labels))
    print(train_images.shape,train_labels.shape)
    
    train_images,test_images=do_transforms(train_images,test_images)

    train_images_np=np.resize(train_images,(len(train_images),\
                            Config.NUM_X_PIXELS*Config.NUM_Y_PIXELS))
    train_labels_np=np.resize(train_labels,(len(train_labels)))

    test_images_np=np.resize(test_images,(len(test_images),\
                            Config.NUM_X_PIXELS*Config.NUM_Y_PIXELS))
    test_labels_np=np.resize(test_labels,(len(test_labels)))

    train_images_df=pd.DataFrame(train_images_np)
    train_labels_df=pd.DataFrame(train_labels_np)

    test_images_df=pd.DataFrame(test_images_np)
    test_labels_df=pd.DataFrame(test_labels_np)

    knn,train_similarity_vectors=training(train_labels_np,train_images_df)

    test_similarity_vectors=testing(test_labels_np,test_images_df,knn)

    #
    #
    #
    train_images,num_extra_rows=\
        append_sim_vec(train_images_np,train_similarity_vectors)
    test_images,_=append_sim_vec(test_images_np,test_similarity_vectors)
    print(type(train_images),type(train_labels))
    print('train_images,train_labels: ',train_images.shape,train_labels.shape)
    X=np.vstack((train_images,test_images))
    Y=np.hstack((train_labels,test_labels))

    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    cvscores = []
    for train, test in kfold.split(X, Y):
        model=None
        model=create_CNN_model(Config.NUM_X_PIXELS+num_extra_rows,\
                               Config.NUM_Y_PIXELS)
        model.compile(optimizer='rmsprop',loss='categorical_crossentropy',\
                      metrics=['accuracy'])
    
        debug(0,'Start of fit')
        model.fit(X[train],to_categorical(Y[train]),epochs=Config.NUM_EPOCHS,\
                  batch_size=64,verbose=1)
        debug(0,'End of fit')

        scores= model.evaluate(X[test],to_categorical(Y[test]),verbose=0)
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        cvscores.append(scores[1] * 100)
    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
    debug(0,'End of CNN model with similarity_vector')
    return scores

In [16]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=600
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES
Config.NUM_KERAS_TEST_IMAGES=100
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-09 07:57:47.548767 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(600, 28, 28) (600,)
image_label,image_no_label 600 600
The range (min num reps of a digit, max num reps of a digit):
49 79
2019-12-09 07:57:48.103009 Start get_similarity_vectors() processing
2019-12-09 07:58:10.830340 End get_similarity_vectors() processing
The training accuracy is:  0.595
The range (min num reps of a digit, max num reps of a digit):
2 15
2019-12-09 07:58:10.924070 Start get_similarity_vectors() processing
2019-12-09 07:58:14.747752 End get_similarity_vectors() processing
The testing accuracy is  0.44
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
train_images,train_labels:  (600, 29, 28, 1) (600,)
2019-12-09 07:58:15.219729 Start of fit
Train on 625 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 07:58:30.554694 End of fit
accuracy: 89.33%
2019-12-09 07:58:31.871853 Start of fit
Train on 626 samples
Epoch 1/5
Epoch 2

[0.47337443144484, 0.8333333]

In [17]:
#     NUM_X_PIXELS=28
#     NUM_Y_PIXELS=28
#     PIXEL_CUTOFF=50
#     NUM_SPHERES=8
#     NUM_SECTORS_QUAD_1=3
#     NUM_SECTORS=4*NUM_SECTORS_QUAD_1
#     NUM_POWER_SUMS=4
#     KNOWN_NUM_CLUSTERS=10
#     TRAIN_NUM_ROWS=50_000#
#     TEST_NUM_ROWS=10_000#
#     NUM_NEIGHBORS=9
#     USE_AREA_INVAR=False
#     SIMILARITY_VECTOR_TYPE=1
#     NUM_EPOCHS=5
#     NUM_KERAS_TRAIN_IMAGES=60_000
#     NUM_KERAS_TRAIN_LABELS=NUM_KERAS_TRAIN_IMAGES
#     NUM_KERAS_TEST_IMAGES=10_000
#     NUM_KERAS_TEST_LABELS=NUM_KERAS_TEST_IMAGES
#     SCALE_SIMILARITY_VECTORS=True
#     USE_CENTROIDS=False
#     DO_PROFILING=False

In [18]:
#     if Config.SIMILARITY_VECTOR_TYPE==0:
#         return np.hstack((num_sector_sphere_pixels_np, similarity_vector))
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
#     elif Config.SIMILARITY_VECTOR_TYPE==2:
#         return num_sector_sphere_pixels_np
#     elif Config.SIMILARITY_VECTOR_TYPE==3:
#         return np.hstack((one_image.to_numpy(), num_sector_sphere_pixels_np, \
#                           similarity_vector))
#     elif Config.SIMILARITY_VECTOR_TYPE==4:
#         return np.hstack((one_image.to_numpy(), similarity_vector))
#     else:# Config.SIMILARITY_VECTOR_TYPE==5:
#         return one_image.to_numpy()

In [19]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=6_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=1_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()
#     debug(0,'Start of CNN model with similarity_vector: SIMILARITY_VECTOR_TYPE=1')

2019-12-09 08:01:07.780658 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 08:01:08.455259 Start get_similarity_vectors() processing
2019-12-09 08:01:46.823964 ('we have processed ', 999, ' images')
2019-12-09 08:02:25.424027 ('we have processed ', 1999, ' images')
2019-12-09 08:03:05.092353 ('we have processed ', 2999, ' images')
2019-12-09 08:03:43.684300 ('we have processed ', 3999, ' images')
2019-12-09 08:04:24.220692 ('we have processed ', 4999, ' images')
2019-12-09 08:05:04.902661 ('we have processed ', 5999, ' images')
2019-12-09 08:05:04.949538 End get_similarity_vectors() processing
The training accuracy is:  0.623
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 08:05:06.118488 Start get_similarity_vectors() processing
2019-12-09 08:05:44.798723 ('we 

[0.09931242170518842, 0.9698276]

In [20]:
#     if Config.SIMILARITY_VECTOR_TYPE==0:
#         return np.hstack((num_sector_sphere_pixels_np, similarity_vector))
Config.SIMILARITY_VECTOR_TYPE=0
test_CNN_with_similarity_vector()

2019-12-09 08:27:07.946708 ('Start of CNN model with similarity_vector TYPE=', 0)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 08:27:08.698224 Start get_similarity_vectors() processing
2019-12-09 08:27:47.466420 ('we have processed ', 999, ' images')
2019-12-09 08:28:27.809946 ('we have processed ', 1999, ' images')
2019-12-09 08:29:09.681748 ('we have processed ', 2999, ' images')
2019-12-09 08:29:51.152557 ('we have processed ', 3999, ' images')
2019-12-09 08:30:34.317347 ('we have processed ', 4999, ' images')
2019-12-09 08:31:19.584297 ('we have processed ', 5999, ' images')
2019-12-09 08:31:19.631170 End get_similarity_vectors() processing
The training accuracy is:  0.9371666666666667
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 08:31:35.207473 Start get_similarity_vectors() processing
2019-12-09 08:32:12

[0.07873797612585898, 0.96695405]

In [21]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1
test_CNN_with_similarity_vector()

2019-12-09 08:56:34.749214 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 08:56:35.420883 Start get_similarity_vectors() processing
2019-12-09 08:57:14.570500 ('we have processed ', 999, ' images')
2019-12-09 08:57:53.815501 ('we have processed ', 1999, ' images')
2019-12-09 08:58:33.554815 ('we have processed ', 2999, ' images')
2019-12-09 08:59:12.051159 ('we have processed ', 3999, ' images')
2019-12-09 08:59:52.205138 ('we have processed ', 4999, ' images')
2019-12-09 09:00:32.653724 ('we have processed ', 5999, ' images')
2019-12-09 09:00:32.700604 End get_similarity_vectors() processing
The training accuracy is:  0.623
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 09:00:33.947120 Start get_similarity_vectors() processing
2019-12-09 09:01:11.530976 ('we 

[0.06167907113658971, 0.9770115]

In [22]:
#     elif Config.SIMILARITY_VECTOR_TYPE==2:
#         return num_sector_sphere_pixels_np
Config.SIMILARITY_VECTOR_TYPE=2
test_CNN_with_similarity_vector()

2019-12-09 09:22:23.884399 ('Start of CNN model with similarity_vector TYPE=', 2)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 09:22:24.506949 Start get_similarity_vectors() processing
2019-12-09 09:23:03.606083 ('we have processed ', 999, ' images')
2019-12-09 09:23:43.386373 ('we have processed ', 1999, ' images')
2019-12-09 09:24:23.868090 ('we have processed ', 2999, ' images')
2019-12-09 09:25:03.867304 ('we have processed ', 3999, ' images')
2019-12-09 09:25:44.837360 ('we have processed ', 4999, ' images')
2019-12-09 09:26:27.018836 ('we have processed ', 5999, ' images')
2019-12-09 09:26:27.065725 End get_similarity_vectors() processing
The training accuracy is:  0.9325
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 09:26:41.302357 Start get_similarity_vectors() processing
2019-12-09 09:27:18.804531 ('we

[0.09014075289041489, 0.9683908]

In [23]:
#     elif Config.SIMILARITY_VECTOR_TYPE==3:
#         return np.hstack((one_image.to_numpy(), num_sector_sphere_pixels_np, \
#                           similarity_vector))
Config.SIMILARITY_VECTOR_TYPE=3
test_CNN_with_similarity_vector()

2019-12-09 09:52:32.630573 ('Start of CNN model with similarity_vector TYPE=', 3)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 09:52:33.528468 Start get_similarity_vectors() processing
2019-12-09 09:53:21.712717 ('we have processed ', 999, ' images')
2019-12-09 09:54:17.364740 ('we have processed ', 1999, ' images')
2019-12-09 09:55:22.938042 ('we have processed ', 2999, ' images')
2019-12-09 09:56:34.864973 ('we have processed ', 3999, ' images')
2019-12-09 09:57:58.944417 ('we have processed ', 4999, ' images')
2019-12-09 09:59:28.936126 ('we have processed ', 5999, ' images')
2019-12-09 09:59:29.045474 End get_similarity_vectors() processing
The training accuracy is:  0.9323333333333333
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 10:01:09.483946 Start get_similarity_vectors() processing
2019-12-09 10:01:54

[0.07498915565879909, 0.9683908]

In [24]:
#     elif Config.SIMILARITY_VECTOR_TYPE==4:
#         return np.hstack((one_image.to_numpy(), similarity_vector))
Config.SIMILARITY_VECTOR_TYPE=4
test_CNN_with_similarity_vector()

2019-12-09 10:49:14.008987 ('Start of CNN model with similarity_vector TYPE=', 4)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 10:49:14.633196 Start get_similarity_vectors() processing
2019-12-09 10:49:57.309481 ('we have processed ', 999, ' images')
2019-12-09 10:50:47.880155 ('we have processed ', 1999, ' images')
2019-12-09 10:51:45.983975 ('we have processed ', 2999, ' images')
2019-12-09 10:52:51.661348 ('we have processed ', 3999, ' images')
2019-12-09 10:54:05.906516 ('we have processed ', 4999, ' images')
2019-12-09 10:55:29.575899 ('we have processed ', 5999, ' images')
2019-12-09 10:55:29.669650 End get_similarity_vectors() processing
The training accuracy is:  0.9255
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 10:56:51.821969 Start get_similarity_vectors() processing
2019-12-09 10:57:32.924295 ('we

[0.07483113081804638, 0.9813218]

In [25]:
#     else:# Config.SIMILARITY_VECTOR_TYPE==5:
#         return one_image.to_numpy()
Config.SIMILARITY_VECTOR_TYPE=5
test_CNN_with_similarity_vector()

2019-12-09 11:41:39.565732 ('Start of CNN model with similarity_vector TYPE=', 5)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 11:41:40.196316 Start get_similarity_vectors() processing
2019-12-09 11:42:21.891297 ('we have processed ', 999, ' images')
2019-12-09 11:43:04.613272 ('we have processed ', 1999, ' images')
2019-12-09 11:43:47.878467 ('we have processed ', 2999, ' images')
2019-12-09 11:44:30.486845 ('we have processed ', 3999, ' images')
2019-12-09 11:45:14.204283 ('we have processed ', 4999, ' images')
2019-12-09 11:45:59.452704 ('we have processed ', 5999, ' images')
2019-12-09 11:45:59.499577 End get_similarity_vectors() processing
The training accuracy is:  0.9233333333333333
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 11:47:21.443093 Start get_similarity_vectors() processing
2019-12-09 11:47:58

[0.08084930814588549, 0.96408045]

In [27]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=6_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=1_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-09 20:26:17.078726 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(6000, 28, 28) (6000,)
image_label,image_no_label 6000 6000
The range (min num reps of a digit, max num reps of a digit):
514 671
2019-12-09 20:26:18.217511 Start get_similarity_vectors() processing
2019-12-09 20:26:59.971700 ('we have processed ', 999, ' images')
2019-12-09 20:27:38.851595 ('we have processed ', 1999, ' images')
2019-12-09 20:28:18.167541 ('we have processed ', 2999, ' images')
2019-12-09 20:28:56.289471 ('we have processed ', 3999, ' images')
2019-12-09 20:29:35.984366 ('we have processed ', 4999, ' images')
2019-12-09 20:30:16.355422 ('we have processed ', 5999, ' images')
2019-12-09 20:30:16.402309 End get_similarity_vectors() processing
The training accuracy is:  0.623
The range (min num reps of a digit, max num reps of a digit):
85 126
2019-12-09 20:30:17.570052 Start get_similarity_vectors() processing
2019-12-09 20:30:54.927219 ('we 

[0.1093817762281189, 0.9583333]

# Does accuracy improvement help the most with fewer images?

In [28]:
Config.NUM_KERAS_TRAIN_IMAGES=12_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=2_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-09 20:52:45.972902 Start of verbatim_from_book_CNN
2019-12-09 20:52:47.585554 Start of fit
Train on 12595 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 20:56:54.899804 End of fit
accuracy: 97.44%
2019-12-09 20:56:59.286173 Start of fit
Train on 12597 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:01:04.198382 End of fit
accuracy: 97.79%
2019-12-09 21:01:08.665177 Start of fit
Train on 12597 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:05:12.961070 End of fit
accuracy: 97.58%
2019-12-09 21:05:16.926071 Start of fit
Train on 12597 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:09:20.217501 End of fit
accuracy: 97.86%
2019-12-09 21:09:24.442020 Start of fit
Train on 12598 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:13:30.526585 End of fit
accuracy: 98.00%
2019-12-09 21:13:35.116124 Start of fit
Train on 12601 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

In [29]:
Config.NUM_KERAS_TRAIN_IMAGES=24_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=4_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-09 21:34:22.127747 Start of verbatim_from_book_CNN
2019-12-09 21:34:24.232965 Start of fit
Train on 25195 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:42:29.989081 End of fit
accuracy: 98.93%
2019-12-09 21:42:37.262628 Start of fit
Train on 25196 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:50:40.652061 End of fit
accuracy: 98.89%
2019-12-09 21:50:47.870359 Start of fit
Train on 25198 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 21:58:50.535286 End of fit
accuracy: 98.11%
2019-12-09 21:58:57.765763 Start of fit
Train on 25198 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 22:07:00.648633 End of fit
accuracy: 98.75%
2019-12-09 22:07:07.715957 Start of fit
Train on 25198 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 22:15:11.343588 End of fit
accuracy: 98.57%
2019-12-09 22:15:19.084846 Start of fit
Train on 25201 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

In [30]:
Config.NUM_KERAS_TRAIN_IMAGES=36_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=6_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-09 22:56:45.799455 Start of verbatim_from_book_CNN
2019-12-09 22:56:48.592236 Start of fit
Train on 37797 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 23:09:02.785698 End of fit
accuracy: 98.95%
2019-12-09 23:12:04.348209 Start of fit
Train on 37797 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 23:24:07.878337 End of fit
accuracy: 98.76%
2019-12-09 23:24:20.481135 Start of fit
Train on 37798 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 23:36:37.119274 End of fit
accuracy: 98.74%
2019-12-09 23:36:48.335295 Start of fit
Train on 37798 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-09 23:48:52.985871 End of fit
accuracy: 98.43%
2019-12-09 23:49:04.676919 Start of fit
Train on 37798 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 00:01:08.426355 End of fit
accuracy: 98.69%
2019-12-10 00:01:19.989523 Start of fit
Train on 37800 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

In [31]:
Config.NUM_KERAS_TRAIN_IMAGES=6_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=1_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-10 01:03:56.494019 Start of verbatim_from_book_CNN
2019-12-10 01:03:59.145538 Start of fit
Train on 6296 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 01:06:09.020725 End of fit
accuracy: 97.73%
2019-12-10 01:06:13.168364 Start of fit
Train on 6297 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 01:08:37.174312 End of fit
accuracy: 97.01%
2019-12-10 01:08:41.124679 Start of fit
Train on 6297 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 01:10:45.095577 End of fit
accuracy: 97.30%
2019-12-10 01:10:48.644133 Start of fit
Train on 6298 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 01:12:54.540415 End of fit
accuracy: 97.86%
2019-12-10 01:12:58.333429 Start of fit
Train on 6298 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 01:15:03.052517 End of fit
accuracy: 96.44%
2019-12-10 01:15:06.615139 Start of fit
Train on 6301 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoc

In [32]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=12_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=2_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-10 01:25:55.379901 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(12000, 28, 28) (12000,)
image_label,image_no_label 12000 12000
The range (min num reps of a digit, max num reps of a digit):
1048 1351
2019-12-10 01:25:56.180246 Start get_similarity_vectors() processing
2019-12-10 01:26:37.636374 ('we have processed ', 999, ' images')
2019-12-10 01:27:18.748180 ('we have processed ', 1999, ' images')
2019-12-10 01:28:03.789245 ('we have processed ', 2999, ' images')
2019-12-10 01:28:43.290017 ('we have processed ', 3999, ' images')
2019-12-10 01:29:24.086251 ('we have processed ', 4999, ' images')
2019-12-10 01:30:05.473373 ('we have processed ', 5999, ' images')
2019-12-10 01:30:46.347358 ('we have processed ', 6999, ' images')
2019-12-10 01:31:25.634928 ('we have processed ', 7999, ' images')
2019-12-10 01:32:07.002175 ('we have processed ', 8999, ' images')
2019-12-10 01:32:47.173544 ('we have processed ', 9999, ' image

[0.06089299398017079, 0.981362]

In [33]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=24_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=4_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-10 02:18:17.975598 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(24000, 28, 28) (24000,)
image_label,image_no_label 24000 24000
The range (min num reps of a digit, max num reps of a digit):
2144 2732
2019-12-10 02:18:19.700792 Start get_similarity_vectors() processing
2019-12-10 02:19:03.874246 ('we have processed ', 999, ' images')
2019-12-10 02:19:46.117843 ('we have processed ', 1999, ' images')
2019-12-10 02:20:28.023483 ('we have processed ', 2999, ' images')
2019-12-10 02:21:07.923365 ('we have processed ', 3999, ' images')
2019-12-10 02:21:49.028194 ('we have processed ', 4999, ' images')
2019-12-10 02:22:31.575568 ('we have processed ', 5999, ' images')
2019-12-10 02:23:13.888035 ('we have processed ', 6999, ' images')
2019-12-10 02:23:54.117663 ('we have processed ', 7999, ' images')
2019-12-10 02:24:34.741336 ('we have processed ', 8999, ' images')
2019-12-10 02:25:14.162382 ('we have processed ', 9999, ' image



2019-12-10 02:29:56.063222 ('we have processed ', 16999, ' images')
2019-12-10 02:30:35.925379 ('we have processed ', 17999, ' images')
2019-12-10 02:31:16.364191 ('we have processed ', 18999, ' images')
2019-12-10 02:31:58.186717 ('we have processed ', 19999, ' images')
2019-12-10 02:32:39.084287 ('we have processed ', 20999, ' images')
2019-12-10 02:33:20.198532 ('we have processed ', 21999, ' images')
2019-12-10 02:34:01.781858 ('we have processed ', 22999, ' images')
2019-12-10 02:34:43.456606 ('we have processed ', 23999, ' images')
2019-12-10 02:34:43.503480 End get_similarity_vectors() processing
The training accuracy is:  0.6163333333333333
The range (min num reps of a digit, max num reps of a digit):
370 450
2019-12-10 02:34:49.697355 Start get_similarity_vectors() processing
2019-12-10 02:35:27.166144 ('we have processed ', 999, ' images')
2019-12-10 02:36:04.596471 ('we have processed ', 1999, ' images')
2019-12-10 02:36:42.296312 ('we have processed ', 2999, ' images')
2019

[0.057451029734663366, 0.98390555]

In [34]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=36_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=6_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-10 04:02:29.408032 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(36000, 28, 28) (36000,)
image_label,image_no_label 36000 36000
The range (min num reps of a digit, max num reps of a digit):
3230 4113
2019-12-10 04:02:31.121581 Start get_similarity_vectors() processing
2019-12-10 04:03:19.502549 ('we have processed ', 999, ' images')
2019-12-10 04:03:59.981956 ('we have processed ', 1999, ' images')
2019-12-10 04:04:40.763586 ('we have processed ', 2999, ' images')
2019-12-10 04:05:20.399700 ('we have processed ', 3999, ' images')
2019-12-10 04:06:01.181348 ('we have processed ', 4999, ' images')
2019-12-10 04:06:41.659559 ('we have processed ', 5999, ' images')
2019-12-10 04:07:23.932476 ('we have processed ', 6999, ' images')
2019-12-10 04:08:02.530960 ('we have processed ', 7999, ' images')
2019-12-10 04:08:42.933105 ('we have processed ', 8999, ' images')
2019-12-10 04:09:22.266812 ('we have processed ', 9999, ' image



2019-12-10 04:14:04.830299 ('we have processed ', 16999, ' images')
2019-12-10 04:14:44.832205 ('we have processed ', 17999, ' images')
2019-12-10 04:15:24.640755 ('we have processed ', 18999, ' images')
2019-12-10 04:16:06.938908 ('we have processed ', 19999, ' images')
2019-12-10 04:16:47.833343 ('we have processed ', 20999, ' images')
2019-12-10 04:17:29.291953 ('we have processed ', 21999, ' images')
2019-12-10 04:18:10.791083 ('we have processed ', 22999, ' images')
2019-12-10 04:18:51.997319 ('we have processed ', 23999, ' images')
2019-12-10 04:19:32.904956 ('we have processed ', 24999, ' images')
2019-12-10 04:20:13.849031 ('we have processed ', 25999, ' images')
2019-12-10 04:20:54.142516 ('we have processed ', 26999, ' images')
2019-12-10 04:21:35.394956 ('we have processed ', 27999, ' images')
2019-12-10 04:22:18.100092 ('we have processed ', 28999, ' images')
2019-12-10 04:22:59.668396 ('we have processed ', 29999, ' images')
2019-12-10 04:23:40.270009 ('we have processed '

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 06:47:28.419551 End of fit
accuracy: 98.43%
98.72% (+/- 0.28%)
2019-12-10 06:47:39.255017 End of CNN model with similarity_vector


[0.053361705205353084, 0.98426324]

In [35]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=48_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=8_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-10 06:47:42.082086 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(48000, 28, 28) (48000,)
image_label,image_no_label 48000 48000
The range (min num reps of a digit, max num reps of a digit):
4324 5470
2019-12-10 06:47:45.959232 Start get_similarity_vectors() processing
2019-12-10 06:48:31.386921 ('we have processed ', 999, ' images')
2019-12-10 06:49:12.029985 ('we have processed ', 1999, ' images')
2019-12-10 06:49:52.126699 ('we have processed ', 2999, ' images')
2019-12-10 06:50:31.144001 ('we have processed ', 3999, ' images')
2019-12-10 06:51:10.994230 ('we have processed ', 4999, ' images')
2019-12-10 06:51:51.752365 ('we have processed ', 5999, ' images')
2019-12-10 06:52:31.957597 ('we have processed ', 6999, ' images')
2019-12-10 06:53:10.448461 ('we have processed ', 7999, ' images')
2019-12-10 06:53:53.468760 ('we have processed ', 8999, ' images')
2019-12-10 06:54:33.525079 ('we have processed ', 9999, ' image



2019-12-10 06:59:26.508025 ('we have processed ', 16999, ' images')
2019-12-10 07:00:08.932098 ('we have processed ', 17999, ' images')
2019-12-10 07:00:50.164556 ('we have processed ', 18999, ' images')
2019-12-10 07:01:32.588138 ('we have processed ', 19999, ' images')
2019-12-10 07:02:13.529909 ('we have processed ', 20999, ' images')
2019-12-10 07:02:54.659921 ('we have processed ', 21999, ' images')
2019-12-10 07:03:36.573197 ('we have processed ', 22999, ' images')
2019-12-10 07:04:17.806593 ('we have processed ', 23999, ' images')
2019-12-10 07:04:58.885173 ('we have processed ', 24999, ' images')
2019-12-10 07:05:39.724301 ('we have processed ', 25999, ' images')
2019-12-10 07:06:20.133360 ('we have processed ', 26999, ' images')
2019-12-10 07:07:01.477621 ('we have processed ', 27999, ' images')
2019-12-10 07:07:44.933101 ('we have processed ', 28999, ' images')
2019-12-10 07:08:26.409715 ('we have processed ', 29999, ' images')
2019-12-10 07:09:06.925867 ('we have processed '

[0.030160645325549598, 0.98981595]

In [36]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

test_CNN_with_similarity_vector()

2019-12-10 10:25:09.939968 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(60000, 28, 28) (60000,)
image_label,image_no_label 60000 60000
The range (min num reps of a digit, max num reps of a digit):
5421 6742
2019-12-10 10:25:13.061646 Start get_similarity_vectors() processing
2019-12-10 10:25:58.357801 ('we have processed ', 999, ' images')
2019-12-10 10:26:38.597678 ('we have processed ', 1999, ' images')
2019-12-10 10:27:18.480111 ('we have processed ', 2999, ' images')
2019-12-10 10:27:57.456937 ('we have processed ', 3999, ' images')
2019-12-10 10:28:37.290487 ('we have processed ', 4999, ' images')
2019-12-10 10:29:17.939926 ('we have processed ', 5999, ' images')
2019-12-10 10:29:58.691651 ('we have processed ', 6999, ' images')
2019-12-10 10:30:37.054437 ('we have processed ', 7999, ' images')
2019-12-10 10:31:17.296515 ('we have processed ', 8999, ' images')
2019-12-10 10:31:57.458302 ('we have processed ', 9999, ' image



2019-12-10 10:36:43.978902 ('we have processed ', 16999, ' images')
2019-12-10 10:37:24.004765 ('we have processed ', 17999, ' images')
2019-12-10 10:38:04.120718 ('we have processed ', 18999, ' images')
2019-12-10 10:38:45.983749 ('we have processed ', 19999, ' images')
2019-12-10 10:39:26.973207 ('we have processed ', 20999, ' images')
2019-12-10 10:40:08.749131 ('we have processed ', 21999, ' images')
2019-12-10 10:40:50.565851 ('we have processed ', 22999, ' images')
2019-12-10 10:41:31.874413 ('we have processed ', 23999, ' images')
2019-12-10 10:42:12.344194 ('we have processed ', 24999, ' images')
2019-12-10 10:42:53.291206 ('we have processed ', 25999, ' images')
2019-12-10 10:43:34.072912 ('we have processed ', 26999, ' images')
2019-12-10 10:44:15.429700 ('we have processed ', 27999, ' images')
2019-12-10 10:44:58.983494 ('we have processed ', 28999, ' images')
2019-12-10 10:45:40.869594 ('we have processed ', 29999, ' images')
2019-12-10 10:46:21.758308 ('we have processed '

[0.03197846616445077, 0.9909949]

In [37]:
Config.NUM_KERAS_TRAIN_IMAGES=48_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=8_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-10 15:07:12.613834 Start of verbatim_from_book_CNN
2019-12-10 15:07:34.073486 Start of fit
Train on 50395 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 15:25:44.029011 End of fit
accuracy: 98.88%
2019-12-10 15:26:09.883683 Start of fit
Train on 50396 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 15:58:54.851129 End of fit
accuracy: 98.82%
2019-12-10 15:59:14.919947 Start of fit
Train on 50398 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 16:16:49.649417 End of fit
accuracy: 98.98%
2019-12-10 16:17:10.123763 Start of fit
Train on 50399 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 16:34:19.252588 End of fit
accuracy: 98.91%
2019-12-10 16:34:48.209516 Start of fit
Train on 50400 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 16:52:06.103676 End of fit
accuracy: 99.20%
2019-12-10 16:52:22.391739 Start of fit
Train on 50401 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

In [38]:
Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

verbatim_from_book_CNN()

2019-12-10 18:37:16.226115 Start of verbatim_from_book_CNN
2019-12-10 18:37:47.311707 Start of fit
Train on 62995 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 18:58:15.742434 End of fit
accuracy: 98.66%
2019-12-10 18:58:42.022040 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 19:19:30.574587 End of fit
accuracy: 99.20%
2019-12-10 19:20:04.138859 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 19:40:58.475275 End of fit
accuracy: 98.99%
2019-12-10 19:41:20.739361 Start of fit
Train on 62999 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 20:02:02.788383 End of fit
accuracy: 99.20%
2019-12-10 20:02:22.479436 Start of fit
Train on 63000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-10 20:22:42.497536 End of fit
accuracy: 99.09%
2019-12-10 20:23:02.271137 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

In [17]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=1

Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

for _ in range(3):
    test_CNN_with_similarity_vector()

2019-12-12 04:12:50.843219 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(60000, 28, 28) (60000,)
image_label,image_no_label 60000 60000
The range (min num reps of a digit, max num reps of a digit):
5421 6742
2019-12-12 04:12:51.848428 Start get_similarity_vectors() processing
2019-12-12 04:13:32.065682 ('we have processed ', 999, ' images')
2019-12-12 04:14:12.354135 ('we have processed ', 1999, ' images')
2019-12-12 04:14:52.551560 ('we have processed ', 2999, ' images')
2019-12-12 04:15:31.776329 ('we have processed ', 3999, ' images')
2019-12-12 04:16:11.528223 ('we have processed ', 4999, ' images')
2019-12-12 04:16:52.531265 ('we have processed ', 5999, ' images')
2019-12-12 04:17:32.063343 ('we have processed ', 6999, ' images')
2019-12-12 04:18:09.730215 ('we have processed ', 7999, ' images')
2019-12-12 04:18:49.555070 ('we have processed ', 8999, ' images')
2019-12-12 04:19:28.551082 ('we have processed ', 9999, ' image



2019-12-12 04:24:05.868856 ('we have processed ', 16999, ' images')
2019-12-12 04:24:45.699210 ('we have processed ', 17999, ' images')
2019-12-12 04:25:24.925906 ('we have processed ', 18999, ' images')
2019-12-12 04:26:06.548198 ('we have processed ', 19999, ' images')
2019-12-12 04:26:47.505640 ('we have processed ', 20999, ' images')
2019-12-12 04:27:28.409237 ('we have processed ', 21999, ' images')
2019-12-12 04:28:09.440198 ('we have processed ', 22999, ' images')
2019-12-12 04:28:50.452875 ('we have processed ', 23999, ' images')
2019-12-12 04:29:30.631510 ('we have processed ', 24999, ' images')
2019-12-12 04:30:11.871360 ('we have processed ', 25999, ' images')
2019-12-12 04:30:51.882998 ('we have processed ', 26999, ' images')
2019-12-12 04:31:33.627044 ('we have processed ', 27999, ' images')
2019-12-12 04:32:17.268554 ('we have processed ', 28999, ' images')
2019-12-12 04:32:57.912238 ('we have processed ', 29999, ' images')
2019-12-12 04:33:38.119305 ('we have processed '



2019-12-12 08:40:14.895247 ('we have processed ', 16999, ' images')
2019-12-12 08:40:56.109362 ('we have processed ', 17999, ' images')
2019-12-12 08:41:37.554811 ('we have processed ', 18999, ' images')
2019-12-12 08:42:22.837377 ('we have processed ', 19999, ' images')
2019-12-12 08:43:06.907191 ('we have processed ', 20999, ' images')
2019-12-12 08:43:49.777440 ('we have processed ', 21999, ' images')
2019-12-12 08:44:33.920690 ('we have processed ', 22999, ' images')
2019-12-12 08:45:15.035674 ('we have processed ', 23999, ' images')
2019-12-12 08:45:55.544069 ('we have processed ', 24999, ' images')
2019-12-12 08:46:36.340404 ('we have processed ', 25999, ' images')
2019-12-12 08:47:16.495333 ('we have processed ', 26999, ' images')
2019-12-12 08:47:57.469768 ('we have processed ', 27999, ' images')
2019-12-12 08:48:40.274872 ('we have processed ', 28999, ' images')
2019-12-12 08:49:20.831728 ('we have processed ', 29999, ' images')
2019-12-12 08:50:01.241045 ('we have processed '



2019-12-12 12:57:42.983843 ('we have processed ', 16999, ' images')
2019-12-12 12:58:22.806772 ('we have processed ', 17999, ' images')
2019-12-12 12:59:02.511800 ('we have processed ', 18999, ' images')
2019-12-12 12:59:44.883805 ('we have processed ', 19999, ' images')
2019-12-12 13:00:25.734483 ('we have processed ', 20999, ' images')
2019-12-12 13:01:06.708412 ('we have processed ', 21999, ' images')
2019-12-12 13:01:48.108990 ('we have processed ', 22999, ' images')
2019-12-12 13:02:29.286496 ('we have processed ', 23999, ' images')
2019-12-12 13:03:09.810420 ('we have processed ', 24999, ' images')
2019-12-12 13:03:51.062989 ('we have processed ', 25999, ' images')
2019-12-12 13:04:31.934432 ('we have processed ', 26999, ' images')
2019-12-12 13:05:13.155682 ('we have processed ', 27999, ' images')
2019-12-12 13:05:55.957350 ('we have processed ', 28999, ' images')
2019-12-12 13:06:37.236896 ('we have processed ', 29999, ' images')
2019-12-12 13:07:17.410028 ('we have processed '

In [20]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=-1

Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

for _ in range(3):
    test_CNN_with_similarity_vector()

2019-12-12 17:06:42.850416 ('Start of CNN model with similarity_vector TYPE=', -1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(60000, 28, 28) (60000,)
image_label,image_no_label 60000 60000
The range (min num reps of a digit, max num reps of a digit):
5421 6742
2019-12-12 17:06:45.134799 Start get_similarity_vectors() processing
2019-12-12 17:06:49.221413 ('we have processed ', 999, ' images')
2019-12-12 17:06:52.844874 ('we have processed ', 1999, ' images')
2019-12-12 17:06:56.132753 ('we have processed ', 2999, ' images')
2019-12-12 17:07:00.673556 ('we have processed ', 3999, ' images')
2019-12-12 17:07:05.376903 ('we have processed ', 4999, ' images')
2019-12-12 17:07:10.447736 ('we have processed ', 5999, ' images')
2019-12-12 17:07:16.144862 ('we have processed ', 6999, ' images')
2019-12-12 17:07:22.211764 ('we have processed ', 7999, ' images')
2019-12-12 17:07:28.552618 ('we have processed ', 8999, ' images')
2019-12-12 17:07:35.644800 ('we have processed ', 9999, ' imag

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-12 20:03:33.425014 End of fit
accuracy: 98.91%
2019-12-12 20:03:50.788612 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-12 20:27:08.358045 End of fit
accuracy: 99.00%
2019-12-12 20:27:25.551179 Start of fit
Train on 63002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-12 20:50:52.076968 End of fit
accuracy: 99.14%
2019-12-12 20:51:09.145986 Start of fit
Train on 63003 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-12 21:14:32.542545 End of fit
accuracy: 99.14%
2019-12-12 21:17:06.792439 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-12 21:40:33.792274 End of fit
accuracy: 99.03%
2019-12-12 21:40:51.655565 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-12 22:04:33.690437 End of fit
accuracy: 99.14%
99.09% (+/- 0.12%)
2019-12-12 22:04:51.506038 End of CNN model wi

2019-12-12 22:20:38.920058 ('we have processed ', 58999, ' images')
2019-12-12 22:21:08.378420 ('we have processed ', 59999, ' images')
2019-12-12 22:21:08.409691 End get_similarity_vectors() processing
The training accuracy is:  0.28525
The range (min num reps of a digit, max num reps of a digit):
892 1135
2019-12-12 22:54:22.308788 Start get_similarity_vectors() processing
2019-12-12 22:54:24.272140 ('we have processed ', 999, ' images')
2019-12-12 22:54:26.789256 ('we have processed ', 1999, ' images')
2019-12-12 22:54:30.275077 ('we have processed ', 2999, ' images')
2019-12-12 22:54:34.730497 ('we have processed ', 3999, ' images')
2019-12-12 22:54:39.319485 ('we have processed ', 4999, ' images')
2019-12-12 22:54:44.432147 ('we have processed ', 5999, ' images')
2019-12-12 22:54:50.319672 ('we have processed ', 6999, ' images')
2019-12-12 22:54:56.328356 ('we have processed ', 7999, ' images')
2019-12-12 22:55:02.929688 ('we have processed ', 8999, ' images')
2019-12-12 22:55:10.

2019-12-13 03:10:15.959945 ('we have processed ', 999, ' images')
2019-12-13 03:10:19.743578 ('we have processed ', 1999, ' images')
2019-12-13 03:10:23.057160 ('we have processed ', 2999, ' images')
2019-12-13 03:10:27.278516 ('we have processed ', 3999, ' images')
2019-12-13 03:10:31.916701 ('we have processed ', 4999, ' images')
2019-12-13 03:10:37.316317 ('we have processed ', 5999, ' images')
2019-12-13 03:10:42.856549 ('we have processed ', 6999, ' images')
2019-12-13 03:10:48.937378 ('we have processed ', 7999, ' images')
2019-12-13 03:10:55.551522 ('we have processed ', 8999, ' images')
2019-12-13 03:11:02.419606 ('we have processed ', 9999, ' images')
2019-12-13 03:11:10.143865 ('we have processed ', 10999, ' images')
2019-12-13 03:11:18.081288 ('we have processed ', 11999, ' images')
2019-12-13 03:11:27.043610 ('we have processed ', 12999, ' images')
2019-12-13 03:11:35.792587 ('we have processed ', 13999, ' images')
2019-12-13 03:11:45.463873 ('we have processed ', 14999, ' 

Epoch 5/5
2019-12-13 06:10:15.482636 End of fit
accuracy: 99.00%
2019-12-13 06:10:33.392410 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 06:36:37.191802 End of fit
accuracy: 99.17%
2019-12-13 06:36:53.826061 Start of fit
Train on 63002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 07:00:23.862920 End of fit
accuracy: 99.21%
2019-12-13 07:00:41.708315 Start of fit
Train on 63003 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 07:24:21.196648 End of fit
accuracy: 99.00%
2019-12-13 07:24:46.611721 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 07:49:58.147314 End of fit
accuracy: 98.71%
2019-12-13 07:50:16.171586 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 08:17:38.084633 End of fit
accuracy: 99.21%
99.04% (+/- 0.19%)
2019-12-13 08:17:53.945404 End of CNN model with similarity_vector


In [21]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=-1

Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

for _ in range(3):
    test_CNN_with_similarity_vector()

2019-12-13 08:17:55.319673 ('Start of CNN model with similarity_vector TYPE=', -1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(60000, 28, 28) (60000,)
image_label,image_no_label 60000 60000
The range (min num reps of a digit, max num reps of a digit):
5421 6742
2019-12-13 08:17:57.341504 Start get_similarity_vectors() processing
2019-12-13 08:18:02.391542 ('we have processed ', 999, ' images')
2019-12-13 08:18:06.381109 ('we have processed ', 1999, ' images')
2019-12-13 08:18:09.787514 ('we have processed ', 2999, ' images')
2019-12-13 08:18:14.040077 ('we have processed ', 3999, ' images')
2019-12-13 08:18:18.810073 ('we have processed ', 4999, ' images')
2019-12-13 08:18:24.204892 ('we have processed ', 5999, ' images')
2019-12-13 08:18:29.898012 ('we have processed ', 6999, ' images')
2019-12-13 08:18:36.233750 ('we have processed ', 7999, ' images')
2019-12-13 08:18:42.740440 ('we have processed ', 8999, ' images')
2019-12-13 08:18:49.651105 ('we have processed ', 9999, ' imag

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 11:27:34.707081 End of fit
accuracy: 99.00%
2019-12-13 11:27:57.314057 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 11:51:46.289481 End of fit
accuracy: 98.83%
2019-12-13 11:52:05.514057 Start of fit
Train on 63002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 12:16:34.577935 End of fit
accuracy: 99.01%
2019-12-13 12:16:53.886060 Start of fit
Train on 63003 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 12:40:46.187495 End of fit
accuracy: 99.04%
2019-12-13 12:41:07.953227 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 13:09:59.345764 End of fit
accuracy: 98.97%
2019-12-13 13:10:20.126742 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-13 13:33:46.360500 End of fit
accuracy: 99.26%
99.04% (+/- 0.11%)
2019-12-13 13:34:03.327000 End of CNN model wi

2019-12-13 13:50:33.071081 ('we have processed ', 58999, ' images')
2019-12-13 13:51:02.853591 ('we have processed ', 59999, ' images')
2019-12-13 13:51:02.884841 End get_similarity_vectors() processing
The training accuracy is:  0.28665
The range (min num reps of a digit, max num reps of a digit):
892 1135
2019-12-13 14:24:05.271812 Start get_similarity_vectors() processing
2019-12-13 14:24:07.341085 ('we have processed ', 999, ' images')
2019-12-13 14:24:09.862964 ('we have processed ', 1999, ' images')
2019-12-13 14:24:13.082393 ('we have processed ', 2999, ' images')
2019-12-13 14:24:17.688444 ('we have processed ', 3999, ' images')
2019-12-13 14:24:22.225567 ('we have processed ', 4999, ' images')
2019-12-13 14:24:27.274537 ('we have processed ', 5999, ' images')
2019-12-13 14:24:33.367961 ('we have processed ', 6999, ' images')
2019-12-13 14:24:39.303496 ('we have processed ', 7999, ' images')
2019-12-13 14:24:45.966281 ('we have processed ', 8999, ' images')
2019-12-13 14:24:53.

2019-12-13 18:41:35.109293 ('we have processed ', 999, ' images')
2019-12-13 18:41:41.132274 ('we have processed ', 1999, ' images')
2019-12-13 18:41:44.625613 ('we have processed ', 2999, ' images')
2019-12-13 18:41:49.312130 ('we have processed ', 3999, ' images')
2019-12-13 18:41:54.291777 ('we have processed ', 4999, ' images')
2019-12-13 18:41:59.416078 ('we have processed ', 5999, ' images')
2019-12-13 18:42:05.064315 ('we have processed ', 6999, ' images')
2019-12-13 18:42:11.481107 ('we have processed ', 7999, ' images')
2019-12-13 18:42:17.849310 ('we have processed ', 8999, ' images')
2019-12-13 18:42:24.952111 ('we have processed ', 9999, ' images')
2019-12-13 18:42:32.300144 ('we have processed ', 10999, ' images')
2019-12-13 18:42:39.977507 ('we have processed ', 11999, ' images')
2019-12-13 18:42:48.396293 ('we have processed ', 12999, ' images')
2019-12-13 18:42:57.172114 ('we have processed ', 13999, ' images')
2019-12-13 18:43:06.313359 ('we have processed ', 14999, ' 

#### ......................................NUM_POWER_SUMS=4....NUM_POWER_SUMS=1

600/100 verbatim_from_book_CNN 85.41% (+/- 4.90%); 86.18% (+/- 3.88%)

6_000/1_000 verbatim_from_book_CNN 96.74% (+/- 1.50%); 97.07% (+/- 0.74%)

#### SIMILARITY_VECTOR_TYPE=1
600/100 test_CNN_with_similarity_vector 85.88% (+/- 4.85%); 86.26% (+/- 4.10%)

#### SIMILARITY_VECTOR_TYPE=1
6_000/1_000 test_CNN_with_similarity_vector 97.10% (+/- 0.87%); 96.84% (+/- 0.69%)

#### SIMILARITY_VECTOR_TYPE=0
6_000/1_000 test_CNN_with_similarity_vector 97.23% (+/- 0.58%); 97.07% (+/- 0.66%)

#### SIMILARITY_VECTOR_TYPE=1
6_000/1_000 test_CNN_with_similarity_vector 96.68% (+/- 1.44%); 96.46% (+/- 1.12%)

#### SIMILARITY_VECTOR_TYPE=2
6_000/1_000 test_CNN_with_similarity_vector 97.57% (+/- 0.63%); 96.93% (+/- 0.85%)

#### SIMILARITY_VECTOR_TYPE=3
6_000/1_000 test_CNN_with_similarity_vector 97.01% (+/- 0.79%); 97.14% (+/- 1.01%)

#### SIMILARITY_VECTOR_TYPE=4
6_000/1_000 test_CNN_with_similarity_vector 96.73% (+/- 0.96%); 97.04% (+/- 1.31%)

#### SIMILARITY_VECTOR_TYPE=5
6_000/1_000 test_CNN_with_similarity_vector 97.13% (+/- 1.05%); 96.70% (+/- 1.17%)

#### SIMILARITY_VECTOR_TYPE=1
6_000/1_000 test_CNN_with_similarity_vector 96.06% (+/- 1.53%); 96.70% (+/- 0.58%)

12_000/2_000 verbatim_from_book_CNN 97.94% (+/- 0.50%); 98.00% (+/- 0.37%)

24_000/4_000 verbatim_from_book_CNN 98.51% (+/- 0.23%); 98.49% (+/- 0.28%)

36_000/6_000 verbatim_from_book_CNN 98.76% (+/- 0.19%); 98.79% (+/- 0.24%)

6_000/1_000 verbatim_from_book_CNN 96.40% (+/- 1.24%); 97.08% (+/- 0.76%)

#### SIMILARITY_VECTOR_TYPE=1
12_000/2_000 test_CNN_with_similarity_vector 98.14% (+/- 0.51%); 98.11% (+/- 0.49%)

#### SIMILARITY_VECTOR_TYPE=1
24_000/4_000 test_CNN_with_similarity_vector 98.49% (+/- 0.37%); 98.59% (+/- 0.25%)

#### SIMILARITY_VECTOR_TYPE=1
36_000/6_000 test_CNN_with_similarity_vector 98.67% (+/- 0.49%); 98.72% (+/- 0.28%)

#### SIMILARITY_VECTOR_TYPE=1
48_000/8_000 test_CNN_with_similarity_vector 99.07% (+/- 0.10%); 98.88% (+/- 0.22%)

#### SIMILARITY_VECTOR_TYPE=1
60_000/10_000 test_CNN_with_similarity_vector 99.04% (+/- 0.11%); 99.03% (+/- 0.14%)

48_000/8_000 verbatim_from_book_CNN...................; 98.84% (+/- 0.29%)

60_000/10_000 verbatim_from_book_CNN...................;99.06% (+/- 0.18%)

#### SIMILARITY_VECTOR_TYPE=1 NUM_POWER_SUMS=1 repeated 3 times
60_000/10_000 test_CNN_with_similarity_vector: 99.08% (+/- 0.17%); 98.95% (+/- 0.22%); 99.00% (+/- 0.10%); 99.05% (+/- 0.10%)

#### SIMILARITY_VECTOR_TYPE=-1 NUM_POWER_SUMS=1 repeated 3 times and then again
60_000/10_000 test_CNN_with_similarity_vector: 99.09% (+/- 0.12%); 99.12% (+/- 0.11%); 99.04% (+/- 0.19%); 99.04% (+/- 0.11%); 99.09% (+/- 0.10%); 99.06% (+/- 0.13%)

60_000/10_000 verbatim_from_book_CNN repeated 3 times: 99.12% (+/- 0.06%); 99.10% (+/- 0.11%); 99.04% (+/- 0.10%)


In [22]:
#     elif Config.SIMILARITY_VECTOR_TYPE==1:
#         return similarity_vector
Config.SIMILARITY_VECTOR_TYPE=-2

Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

for _ in range(3):
    test_CNN_with_similarity_vector()

2019-12-13 23:40:03.782352 ('Start of CNN model with similarity_vector TYPE=', -2)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(60000, 28, 28) (60000,)
image_label,image_no_label 60000 60000
The range (min num reps of a digit, max num reps of a digit):
5421 6742
2019-12-13 23:40:09.624562 Start get_similarity_vectors() processing


NameError: name 'MAX_NOISE_ADD_TO_PIXEL' is not defined

In [None]:
Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

for _ in range(3):
    verbatim_from_book_CNN()

In [14]:
Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES

Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

for _ in range(3):
    verbatim_from_book_CNN()

2019-12-11 10:55:24.120954 Start of verbatim_from_book_CNN
2019-12-11 10:55:27.330734 Start of fit
Train on 62995 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 11:15:42.709459 End of fit
accuracy: 99.04%
2019-12-11 11:15:57.168452 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 11:36:33.658824 End of fit
accuracy: 99.17%
2019-12-11 11:36:48.787763 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 11:57:12.715804 End of fit
accuracy: 99.03%
2019-12-11 11:57:27.130135 Start of fit
Train on 62999 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 12:17:37.894437 End of fit
accuracy: 99.21%
2019-12-11 12:17:52.419857 Start of fit
Train on 63000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 12:38:04.727179 End of fit
accuracy: 99.07%
2019-12-11 12:38:19.210116 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

Epoch 4/5
Epoch 5/5
2019-12-11 19:24:01.960997 End of fit
accuracy: 99.00%
2019-12-11 19:24:16.555640 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 19:44:20.068002 End of fit
accuracy: 99.19%
2019-12-11 19:44:34.745686 Start of fit
Train on 63002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 20:04:40.432209 End of fit
accuracy: 99.14%
2019-12-11 20:04:55.267805 Start of fit
Train on 63003 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 20:24:50.093600 End of fit
accuracy: 99.19%
2019-12-11 20:25:05.285970 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 20:44:56.995016 End of fit
accuracy: 98.97%
2019-12-11 20:45:11.438849 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-11 21:05:02.624265 End of fit
accuracy: 98.93%
99.04% (+/- 0.10%)
2019-12-11 21:05:15.902072 end of verbatim_from_book_CNN


In [66]:
Config.SIMILARITY_VECTOR_TYPE=1

In [67]:
Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES
Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

Config.TRANSFORM_SHRINK=True
for _ in range(3):
    verbatim_from_book_CNN()

2019-12-14 17:18:53.828675 Start of verbatim_from_book_CNN
2019-12-14 17:25:56.731167 Start of fit
Train on 62995 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-14 17:46:09.825012 End of fit
accuracy: 98.09%
2019-12-14 17:46:26.797037 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-14 18:06:37.402139 End of fit
accuracy: 98.26%
2019-12-14 18:06:52.574928 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-14 18:26:51.451789 End of fit
accuracy: 98.47%
2019-12-14 18:27:06.885082 Start of fit
Train on 62999 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-14 18:47:22.315596 End of fit
accuracy: 98.61%
2019-12-14 18:47:39.450967 Start of fit
Train on 63000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-14 19:12:19.110601 End of fit
accuracy: 98.37%
2019-12-14 19:12:35.028836 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/

Epoch 4/5
Epoch 5/5
2019-12-15 02:46:59.691224 End of fit
accuracy: 98.16%
2019-12-15 02:47:18.451114 Start of fit
Train on 63001 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 03:07:19.132638 End of fit
accuracy: 98.19%
2019-12-15 03:07:37.318812 Start of fit
Train on 63002 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 03:27:41.436039 End of fit
accuracy: 98.47%
2019-12-15 03:28:01.080357 Start of fit
Train on 63003 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 03:48:56.346854 End of fit
accuracy: 98.44%
2019-12-15 03:49:34.215438 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 04:10:37.552389 End of fit
accuracy: 98.14%
2019-12-15 04:10:57.188103 Start of fit
Train on 63004 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 04:31:38.729733 End of fit
accuracy: 98.44%
98.24% (+/- 0.20%)
2019-12-15 04:31:54.982695 end of verbatim_from_book_CNN


In [70]:
Config.NUM_KERAS_TRAIN_IMAGES=60_000
Config.NUM_KERAS_TRAIN_LABELS=Config.NUM_KERAS_TRAIN_IMAGES
Config.NUM_KERAS_TEST_IMAGES=10_000
Config.NUM_KERAS_TEST_LABELS=Config.NUM_KERAS_TEST_IMAGES

Config.SIMILARITY_VECTOR_TYPE=1
Config.TRANSFORM_SHRINK=True
for _ in range(3):
    test_CNN_with_similarity_vector()

2019-12-15 04:32:01.864979 ('Start of CNN model with similarity_vector TYPE=', 1)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(60000, 28, 28) (60000,)
2019-12-15 04:32:10.698985 ('we have transformed ', 999, ' training images')
2019-12-15 04:32:16.733467 ('we have transformed ', 1999, ' training images')
2019-12-15 04:32:22.867668 ('we have transformed ', 2999, ' training images')
2019-12-15 04:32:28.789648 ('we have transformed ', 3999, ' training images')
2019-12-15 04:32:34.699440 ('we have transformed ', 4999, ' training images')
2019-12-15 04:32:40.696836 ('we have transformed ', 5999, ' training images')
2019-12-15 04:32:46.533322 ('we have transformed ', 6999, ' training images')
2019-12-15 04:32:52.781975 ('we have transformed ', 7999, ' training images')
2019-12-15 04:32:58.695693 ('we have transformed ', 8999, ' training images')
2019-12-15 04:33:04.634514 ('we have transformed ', 9999, ' training images')
2019-12-15 04:33:10.209656 ('we have transformed ', 10999, ' train



2019-12-15 04:39:51.558741 ('we have processed ', 999, ' images')
2019-12-15 04:40:23.554938 ('we have processed ', 1999, ' images')
2019-12-15 04:40:54.200774 ('we have processed ', 2999, ' images')
2019-12-15 04:41:24.592700 ('we have processed ', 3999, ' images')
2019-12-15 04:41:56.131778 ('we have processed ', 4999, ' images')
2019-12-15 04:42:27.721808 ('we have processed ', 5999, ' images')
2019-12-15 04:42:58.749900 ('we have processed ', 6999, ' images')
2019-12-15 04:43:28.429045 ('we have processed ', 7999, ' images')
2019-12-15 04:43:59.112821 ('we have processed ', 8999, ' images')
2019-12-15 04:44:29.652153 ('we have processed ', 9999, ' images')
2019-12-15 04:45:02.171392 ('we have processed ', 10999, ' images')
2019-12-15 04:45:32.828896 ('we have processed ', 11999, ' images')
2019-12-15 04:46:03.866001 ('we have processed ', 12999, ' images')
2019-12-15 04:46:35.970572 ('we have processed ', 13999, ' images')
2019-12-15 04:47:06.950835 ('we have processed ', 14999, ' 



2019-12-15 05:13:05.569201 ('we have processed ', 999, ' images')
2019-12-15 05:13:34.742740 ('we have processed ', 1999, ' images')
2019-12-15 05:14:05.110880 ('we have processed ', 2999, ' images')
2019-12-15 05:14:35.787743 ('we have processed ', 3999, ' images')
2019-12-15 05:15:08.183442 ('we have processed ', 4999, ' images')
2019-12-15 05:15:40.688024 ('we have processed ', 5999, ' images')
2019-12-15 05:16:15.813552 ('we have processed ', 6999, ' images')
2019-12-15 05:16:48.553253 ('we have processed ', 7999, ' images')
2019-12-15 05:17:22.698917 ('we have processed ', 8999, ' images')
2019-12-15 05:17:55.072702 ('we have processed ', 9999, ' images')
2019-12-15 05:17:55.103954 End get_similarity_vectors() processing
The testing accuracy is  0.4883
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
train_images,train_labels:  (60000, 29, 28, 1) (60000,)
2019-12-15 05:18:20.488341 Start of fit
Train on 62995 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 05:3

2019-12-15 09:12:45.698485 ('we have transformed ', 8999, ' training images')
2019-12-15 09:12:51.583826 ('we have transformed ', 9999, ' training images')
2019-12-15 09:12:57.539829 ('we have transformed ', 10999, ' training images')
2019-12-15 09:13:03.763507 ('we have transformed ', 11999, ' training images')
2019-12-15 09:13:09.509358 ('we have transformed ', 12999, ' training images')
2019-12-15 09:13:15.275194 ('we have transformed ', 13999, ' training images')
2019-12-15 09:13:21.200326 ('we have transformed ', 14999, ' training images')
2019-12-15 09:13:27.222684 ('we have transformed ', 15999, ' training images')
2019-12-15 09:13:33.050759 ('we have transformed ', 16999, ' training images')
2019-12-15 09:13:38.712107 ('we have transformed ', 17999, ' training images')
2019-12-15 09:13:44.636177 ('we have transformed ', 18999, ' training images')
2019-12-15 09:13:50.669432 ('we have transformed ', 19999, ' training images')
2019-12-15 09:13:56.826311 ('we have transformed ', 20



2019-12-15 09:19:26.820290 ('we have processed ', 999, ' images')
2019-12-15 09:19:59.338070 ('we have processed ', 1999, ' images')
2019-12-15 09:20:30.374750 ('we have processed ', 2999, ' images')
2019-12-15 09:21:00.850302 ('we have processed ', 3999, ' images')
2019-12-15 09:21:31.623962 ('we have processed ', 4999, ' images')
2019-12-15 09:22:03.276379 ('we have processed ', 5999, ' images')
2019-12-15 09:22:34.723623 ('we have processed ', 6999, ' images')
2019-12-15 09:23:04.959162 ('we have processed ', 7999, ' images')
2019-12-15 09:23:36.458548 ('we have processed ', 8999, ' images')
2019-12-15 09:24:07.241821 ('we have processed ', 9999, ' images')
2019-12-15 09:24:39.051236 ('we have processed ', 10999, ' images')
2019-12-15 09:25:09.624062 ('we have processed ', 11999, ' images')
2019-12-15 09:25:41.602931 ('we have processed ', 12999, ' images')
2019-12-15 09:26:13.531873 ('we have processed ', 13999, ' images')
2019-12-15 09:26:44.781630 ('we have processed ', 14999, ' 



2019-12-15 09:53:53.457039 ('we have processed ', 2999, ' images')
2019-12-15 09:54:23.581683 ('we have processed ', 3999, ' images')
2019-12-15 09:54:53.971969 ('we have processed ', 4999, ' images')
2019-12-15 09:55:25.208609 ('we have processed ', 5999, ' images')
2019-12-15 09:55:57.982806 ('we have processed ', 6999, ' images')
2019-12-15 09:56:31.165866 ('we have processed ', 7999, ' images')
2019-12-15 09:57:04.032333 ('we have processed ', 8999, ' images')
2019-12-15 09:57:35.319219 ('we have processed ', 9999, ' images')
2019-12-15 09:57:35.366093 End get_similarity_vectors() processing
The testing accuracy is  0.492
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
train_images,train_labels:  (60000, 29, 28, 1) (60000,)
2019-12-15 09:58:04.950328 Start of fit
Train on 62995 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 10:19:38.300834 End of fit
accuracy: 96.43%
2019-12-15 10:19:58.909767 Start of fit
Train on 62996 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
E

2019-12-15 13:41:07.755018 ('we have transformed ', 9999, ' training images')
2019-12-15 13:41:14.044092 ('we have transformed ', 10999, ' training images')
2019-12-15 13:41:20.083500 ('we have transformed ', 11999, ' training images')
2019-12-15 13:41:26.002379 ('we have transformed ', 12999, ' training images')
2019-12-15 13:41:31.893571 ('we have transformed ', 13999, ' training images')
2019-12-15 13:41:38.403854 ('we have transformed ', 14999, ' training images')
2019-12-15 13:41:44.321294 ('we have transformed ', 15999, ' training images')
2019-12-15 13:41:50.565613 ('we have transformed ', 16999, ' training images')
2019-12-15 13:41:56.422772 ('we have transformed ', 17999, ' training images')
2019-12-15 13:42:02.121400 ('we have transformed ', 18999, ' training images')
2019-12-15 13:42:07.907716 ('we have transformed ', 19999, ' training images')
2019-12-15 13:42:13.826205 ('we have transformed ', 20999, ' training images')
2019-12-15 13:42:19.832148 ('we have transformed ', 2



2019-12-15 13:48:20.434545 ('we have processed ', 1999, ' images')
2019-12-15 13:48:51.321718 ('we have processed ', 2999, ' images')
2019-12-15 13:49:22.181125 ('we have processed ', 3999, ' images')
2019-12-15 13:49:53.637778 ('we have processed ', 4999, ' images')
2019-12-15 13:50:25.489052 ('we have processed ', 5999, ' images')
2019-12-15 13:50:56.650573 ('we have processed ', 6999, ' images')
2019-12-15 13:51:26.660767 ('we have processed ', 7999, ' images')
2019-12-15 13:51:57.234841 ('we have processed ', 8999, ' images')
2019-12-15 13:52:28.035093 ('we have processed ', 9999, ' images')
2019-12-15 13:52:59.424339 ('we have processed ', 10999, ' images')
2019-12-15 13:53:30.589623 ('we have processed ', 11999, ' images')
2019-12-15 13:54:01.891602 ('we have processed ', 12999, ' images')
2019-12-15 13:54:33.622434 ('we have processed ', 13999, ' images')
2019-12-15 13:55:03.519289 ('we have processed ', 14999, ' images')
2019-12-15 13:55:33.717881 ('we have processed ', 15999, 



2019-12-15 14:21:00.732950 ('we have processed ', 999, ' images')
2019-12-15 14:21:29.783499 ('we have processed ', 1999, ' images')
2019-12-15 14:21:59.635341 ('we have processed ', 2999, ' images')
2019-12-15 14:22:30.031725 ('we have processed ', 3999, ' images')
2019-12-15 14:22:59.611776 ('we have processed ', 4999, ' images')
2019-12-15 14:23:32.818148 ('we have processed ', 5999, ' images')
2019-12-15 14:24:03.970708 ('we have processed ', 6999, ' images')
2019-12-15 14:24:37.666644 ('we have processed ', 7999, ' images')
2019-12-15 14:25:10.947414 ('we have processed ', 8999, ' images')
2019-12-15 14:25:44.720413 ('we have processed ', 9999, ' images')
2019-12-15 14:25:44.782929 End get_similarity_vectors() processing
The testing accuracy is  0.4909
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
train_images,train_labels:  (60000, 29, 28, 1) (60000,)
2019-12-15 14:27:42.258070 Start of fit
Train on 62995 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
2019-12-15 14:5

# Results for using CNN versus similarity_vectors in conjunction with CNN
class Config:
    NUM_X_PIXELS=28
    NUM_Y_PIXELS=28
    PIXEL_CUTOFF=50
    NUM_SPHERES=8
    NUM_SECTORS_QUAD_1=3
    NUM_SECTORS=4*NUM_SECTORS_QUAD_1
    NUM_POWER_SUMS=1
    KNOWN_NUM_CLUSTERS=10
    TRAIN_NUM_ROWS=50_000#
    TEST_NUM_ROWS=10_000#
    NUM_NEIGHBORS=9
    USE_AREA_INVAR=False
    SIMILARITY_VECTOR_TYPE=1
    NUM_EPOCHS=5
    NUM_KERAS_TRAIN_IMAGES=60_000
    NUM_KERAS_TRAIN_LABELS=NUM_KERAS_TRAIN_IMAGES
    NUM_KERAS_TEST_IMAGES=10_000
    NUM_KERAS_TEST_LABELS=NUM_KERAS_TEST_IMAGES
    SCALE_SIMILARITY_VECTORS=False
    USE_CENTROIDS=False
    DO_PROFILING=False



# Areas for further work

Phase 1 of 3-part investigation:
- do performance analysis and speed up get_similarity_vector (avoid append; Cpython; vectorize; use profiler)
- make a module that you can import and pip install
- make docstrings
- tune scikit-Learn hyperparameters and Config. constants
- document alternative attempts e.g. USE_AREA_INVAR; the weaker model of kMeans and their centroids 
- AUC-ROC curve for points <tp rate,tn rate> by taking one Config. constant and varying it
- bar charts or box plots for (1 - accuracy) error rate reduction
- are there uses for scatterplots? heatmaps? line graphs?

Phase 2 of 3-part investigation:
- rotate each image by Config.SIN_ALPHA and Config.COS_ALPHA radians
- add smoke or noise to test image
- Invariant for perspectivity (if not projectivity): try adding order of sectors ordered by num_sphere_sector_np to see if it is a useful measure
- try other datasets (COIL, ImageNet)
- use similarity vector with XGBoost
- comparing kNN with similarity vector to CNN with similarity vector

Phase 3 of 3-part investigation:
