## Machine Learning with Python on Jupyter Notebook to Construct Dataset of Captured Images of Objects, Use Tensorflow to Train on the Dataset, and Test the Model with Real Time Recognition

 ### Define Number of Objects to be Trained and Create Respective Train and Test Directories

In [1]:
import os

labels_list = []

num_labels = input('Enter number of different objects to have the AI model train on and recognize: ')
for num in range(int(num_labels)):
    label_name = input(f'Enter the label name for object {num+1}: ')
    labels_list.append(label_name)
    os.makedirs(f'captures/train/{label_name}')
    os.makedirs(f'captures/test/{label_name}')

print('Here is the list of objects to be recorded and collected for model training:\n', labels_list)

Enter number of different objects to have the AI model train on and recognize: 4
Enter the label name for object 1: ruler
Enter the label name for object 2: scissor
Enter the label name for object 3: pen
Enter the label name for object 4: stapler
Here is the list of objects to be recorded and collected for model training:
 ['ruler', 'scissor', 'pen', 'stapler']


 ### Use OpenCV to Build the Train and Test Datasets

In [3]:
import cv2

# 1 for 2nd webcam, 0 for default webcam
cap = cv2.VideoCapture(1)

for label in labels_list:

    capture = 'start'
    
    while True:
              
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

        # define capture data rectangle
        capture_data = frame[100:400, 320:620]
        capture_data = cv2.cvtColor(capture_data, cv2.COLOR_BGR2GRAY)
        capture_data = cv2.resize(capture_data, (128, 128), interpolation = cv2.INTER_AREA)
        copy = frame.copy()
        cv2.rectangle(copy, (320, 100), (620, 400), (255,0,0), 5)
        
        if capture == 'start':
            cv2.putText(copy, 'Hit Enter to Start Capturing', (50 , 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            cv2.putText(copy, f'Label: "{label}"', (50 , 120), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
        
        if capture == 'train':
            image_count += 1
            cv2.putText(copy, f'Recording "{label}" object', (50 , 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            cv2.putText(copy, 'train dataset', (50 , 120), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            cv2.putText(copy, str(image_count), (400 , 400), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            folder = 'captures/' + '/train/' + label + '/'
            cv2.imwrite(folder + str(image_count) + ".jpg", capture_data)

        if capture == 'test':
            image_count += 1
            cv2.putText(copy, f'Recording "{label}" object', (50 , 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            cv2.putText(copy, 'test dataset', (50 , 120), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            cv2.putText(copy, str(image_count), (400 , 400), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 1)
            folder = 'captures/' + '/test/' + label + '/'
            cv2.imwrite(folder + str(image_count) + ".jpg", capture_data)
        
        cv2.imshow('frame', copy)

        if cv2.waitKey(1) == 13: #13 is the Enter Key
            if capture == 'start':
                capture = 'train'
                image_count = 0
            elif capture == 'train':
                capture = 'test'
                image_count = 0
            else:
                break

cap.release()
cv2.destroyAllWindows() 

### Use Data Augmentation to Enhance Dataset

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_rows, img_cols = 128, 128
batch_size = 32

train_data_dir = 'captures/train'
validation_data_dir = 'captures/test'

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=30,
      width_shift_range=0.3,
      height_shift_range=0.3,
      horizontal_flip=True,
      fill_mode='nearest')
 
validation_datagen = ImageDataGenerator(rescale=1./255)
 
train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        color_mode = 'grayscale',
        class_mode='sparse')
 
validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        color_mode = 'grayscale',
        class_mode='sparse')

Found 2374 images belonging to 4 classes.
Found 736 images belonging to 4 classes.


### Define Tensorflow Training Model

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D


model = Sequential()
model.add(Conv2D(128, 3, activation='relu', input_shape=(128, 128, 1) ))
model.add(MaxPooling2D())

model.add(Conv2D(96, 3, activation='relu'))
model.add(MaxPooling2D())

model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPooling2D())

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.20))

model.add(Dense(train_generator.num_classes, activation='softmax'))

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 128)     1280      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 96)        110688    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 96)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 64)        55360     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 12544)             0

### Compile and Train the Model

In [6]:
from tensorflow.keras.optimizers import Adam

# use a reduced learning rate 
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'],
              optimizer=Adam(learning_rate=0.0003))

history = model.fit(train_generator, epochs = 15, 
                    validation_data = validation_generator)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [7]:
# save the model
model.save("objects_detection_cnn_model.h5")

In [1]:
# load the model
from tensorflow.keras.models import load_model

model = load_model('objects_detection_cnn_model.h5')

In [2]:
import os

labels_list = [label for label in sorted(os.listdir('captures/train'))]
labels_list

['pen', 'ruler', 'scissor', 'stapler']

In [3]:
import tensorflow as tf
import numpy as np
import cv2

cap = cv2.VideoCapture(1)

while True:
    
    ret, frame = cap.read()
    
    frame=cv2.flip(frame, 1)

    #define region for detection and prediction
    detection_box = frame[100:400, 320:620]
    detection_box = cv2.cvtColor(detection_box, cv2.COLOR_BGR2GRAY
                                )
    detection_box = cv2.resize(detection_box, (128, 128), interpolation = cv2.INTER_AREA)
    
    copy = frame.copy()
    cv2.rectangle(copy, (320, 100), (620, 400), (255,0,0), 5)
    
    detected = detection_box.reshape(1, 128, 128, 1) 
    detected = detected / 255
    prediction = model.predict(detected)
    confidence = prediction[0][np.argmax(prediction)]

    cv2.putText(copy, labels_list[np.argmax(prediction)], (300 , 80), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 255, 0), 2)
    if confidence > 0.5:
        cv2.putText(copy, 'prediction confidence: ' + str(confidence), (250 , 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255, 0), 2)
    else:
        cv2.putText(copy, 'prediction confidence: ' + str(confidence), (250 , 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 2)
    cv2.imshow('frame', copy)    
    
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break
        
cap.release()
cv2.destroyAllWindows()

In [None]:
cap.release()
cv2.destroyAllWindows()