<a href="https://colab.research.google.com/github/rohan-gopalam/bounding-box-for-mnist/blob/main/Bounding_Box.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from __future__ import print_function
import tensorflow as tf
import random
from os import listdir
import glob
import numpy as np
from scipy import misc
import h5py

from keras.utils import np_utils
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline


random.seed(101)
def build_synth_data(data,labels,dataset_size):


    #dimesions of the 224x224 main pic which a 28x(28*n) n digitimage will be inserted
    synth_img_height = 224
    synth_img_width = 224
    
    #creating new dataset
    synth_data = []
    
    synth_labels = [] 
    
    #creating a string of n images
    for i in range(0,dataset_size):
        
        num_digits = random.randint(1,5)
        
        #getting a random index from the data
        synth_indices = [random.randint(0,len(data)-1) for p in range(0,num_digits)]
        
        #using the indices above to chain together a string of n digit images from the data
        new_small_image = np.hstack([data[index] for index in synth_indices])
        
        #where the n digit number will be inserted in the 400x400 main matrix
        starting_left = random.randint(1,synth_img_width-(num_digits*28))
        starting_bottom = random.randint(28,synth_img_height-1)
        starting_right = starting_left + num_digits*28
        starting_top = starting_bottom - 28 
        small_img_width = num_digits*28

        #creating a label
        new_label =  [starting_left, starting_top, starting_right, starting_bottom]
       

        left_zeros = np.empty(shape = [28, starting_left])
        right_zeros = np.empty(shape = [28, synth_img_width - starting_left - (28*num_digits)])
        bottom_zeros = np.empty(shape = [synth_img_height-starting_bottom,synth_img_height])
        top_zeros = np.empty(shape = [starting_top,synth_img_height])

        #adding the n digit number matrix to the 400x400 matrix
        new_image = np.hstack([left_zeros, new_small_image])
        new_image = np.hstack([new_image, right_zeros])
        new_image = np.vstack([new_image, bottom_zeros])
        new_image = np.vstack([top_zeros, new_image])
        
        #adding the new image/label to the dataset that will be returned
        synth_data.append(new_image)
        synth_labels.append(new_label)
        #print("Image shape: ", len(new_image), len(new_image[0]), "Label: ", new_label)
    return synth_data,synth_labels

  
def prep_data_keras(img_data):
    
    synth_img_height = 224
    synth_img_width = 224
    
    img1 = np.array(img_data, dtype="float32") / 255.0
    img2 = np.array(img_data, dtype="float32") / 255.0
    img3 = np.array(img_data, dtype="float32") / 255.0
    img_data = np.concatenate((img1, img2, img3), axis=2)

    img_data = img_data.reshape(len(img_data),synth_img_height,synth_img_width,3)
    
    return img_data

def convert_labels(labels):
  targets = np.array(labels, dtype="float32")
  return targets

from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape)

X_synth_train,y_synth_train = build_synth_data(X_train,y_train,60)


X_synth_test,y_synth_test = build_synth_data(X_test,y_test,10)
print("build synth data completed")
train_labels = convert_labels(y_synth_train)
print("convert train labels completed: shapey = ", train_labels.shape)
test_labels = convert_labels(y_synth_test)
print("convert test labels completed: shapey = ", test_labels.shape)

train_images = prep_data_keras(X_synth_train)
print("convert train images completed: shapex = ", train_images.shape)
test_images = prep_data_keras(X_synth_test)
print("convert test images completed: shapex = ", test_images.shape)



vgg = tf.keras.applications.VGG16(weights="imagenet", include_top=False,
	input_tensor=tf.keras.Input(shape=(224, 224, 3)))

# freeze all VGG layers so they will *not* be updated during the
# training process
vgg.trainable = False


# flatten the max-pooling output of VGG
flatten = vgg.output
flatten = tf.keras.layers.Flatten()(flatten)

# construct a fully-connected layer header to output the predicted
# bounding box coordinates
bboxHead = tf.keras.layers.Dense(128, activation="relu")(flatten)
#bboxHead = tf.keras.layers.Dense(64, activation="relu")(bboxHead)
bboxHead = tf.keras.layers.Dense(32, activation="relu")(bboxHead)
bboxHead = tf.keras.layers.Dense(4, activation="sigmoid")(bboxHead)

# construct the model we will fine-tune for bounding box regression
model = tf.keras.models.Model(inputs=vgg.input, outputs=bboxHead)



# train the network for bounding box regression

INIT_LR = 1e-4

opt = tf.keras.optimizers.Adam(learning_rate=INIT_LR)
model.compile(loss="mse", optimizer=opt)
print(model.summary())

print("[INFO] training bounding box regressor...")
H = model.fit(
	train_images, train_labels,
	validation_data=(test_images, test_labels),
	batch_size= 32,
	epochs= 25,
	verbose=1)

score = model.evaluate(test_images, test_labels, verbose=0)
print(score)
print(model.metrics_names)

model.save("H.h5", save_format="h5")





(60000, 28, 28)
build synth data completed
convert train labels completed: shapey =  (60, 4)
convert test labels completed: shapey =  (10, 4)
convert train images completed: shapex =  (60, 224, 224, 3)
convert test images completed: shapex =  (10, 224, 224, 3)
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856  

In [None]:
# plot the model training history
import tensorflow as tf
from keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np

N = 25
H = load_model('H.h5')
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.title("Bounding Box Regression Loss on Training Set")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")

OSError: ignored

In [None]:
#concat tester
import numpy as np
x = np.ones((10,5,5,1))
y = np.ones((10,5,5,1))
z = np.ones((10,5,5,1))

answer = np.concatenate((x,y,z), axis=3)
print(answer.shape)

(10, 5, 5, 3)
