<img src="http://imgur.com/1ZcRyrc.png" style="float: left; margin: 20px; height: 55px">

# Capstone: Detecting Manufacturing Nonconformities through Live Video Deep Learning Classification<br>

# Webcam Video Feed and Capture

In [8]:
#imports
import time
import numpy as np
import cv2 as cv
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import tensorflow as tf
from tensorflow import keras

import warnings
warnings.filterwarnings('ignore')

## Initialization of Webcam

In [18]:
cap = cv.VideoCapture(0) # 0 for webcam, filepath for a video file.

if not cap.isOpened():
    print("Cannot open camera.")
else:
    print("Camera is operational.")


Camera is operational.


## Setting Webcam Streaming/Capture Specifications

In [19]:
ret = cap.set(cv.CAP_PROP_FRAME_WIDTH,1280)
ret = cap.set(cv.CAP_PROP_FRAME_HEIGHT,720)
ret = cap.set(cv.CAP_PROP_FPS,5)

fps = cap.get(cv.CAP_PROP_FPS)
width  = cap.get(cv.CAP_PROP_FRAME_WIDTH)   # float
height = cap.get(cv.CAP_PROP_FRAME_HEIGHT)  # float

print(fps, width, height)

5.0 1280.0 720.0


## Video Streaming/Capture

In [11]:
#setting video format
fourcc = cv.VideoWriter_fourcc('m', 'p', '4', 'v')
out = cv.VideoWriter('../video/output.avi', fourcc, fps, (int(width), int(height)))


while(cap.isOpened()):
    ret, frame = cap.read()
    if ret==True:
        #out.write(frame) #outputs a video
        cv.imshow('frame',frame)
       
        if cv.waitKey(1) & 0xFF == ord('q'): #press 'q' to exit
            break
    else:
        break

out.release()
cv.destroyAllWindows()


In [5]:
#release camera if job is done
cap.release()


## Image Capture from Video Feed

In [20]:
#setting video format
fourcc = cv.VideoWriter_fourcc('m', 'p', '4', 'v')
out = cv.VideoWriter('../video/create.avi', fourcc, fps, (int(width), int(height)))
i = 1

time.sleep(2)

while(cap.isOpened()):
    ret, frame = cap.read()
    
    if ret==True:

        cv.imshow('frame',frame)        
        cv.imwrite('../img/progressive/empty/step2'+str(i)+'.jpg', frame) #outputs an image
        i += 1
          
        if (cv.waitKey(1) & 0xFF == ord('q')):
            break
    else:
            break

out.release()
cv.destroyAllWindows()

In [6]:
#release camera if job is done
cap.release()


## Live Video Classification

### Finished Assembly Model Live Video Classification (Base Dataset/Binary)

In [16]:
model = tf.keras.models.load_model('../model/cnn_model_finished_w_variation_all/')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 240, 320, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 120, 160, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 118, 158, 64)      9280      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 59, 79, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 298304)            0         
_________________________________________________________________
dense (Dense)                (None, 64)                19091520  
_________________________________________________________________
dropout (Dropout)            (None, 64)                0

In [20]:
#setting video format
fourcc = cv.VideoWriter_fourcc('m', 'p', '4', 'v')
out = cv.VideoWriter('../video/output_finished_v.avi', fourcc, fps, (int(width), int(height)))

time.sleep(2)

while True:
        _, frame = cap.read()

        #Convert the captured frame into RGB
        imageRGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        
        #image transformations
        im = Image.fromarray(imageRGB)
        im = im.resize((320,240))
        img_array = np.array(im) / 255

        #Expand dimensions to match the 4D Tensor shape for prediction
        img_array = np.expand_dims(img_array, axis=0)
        
        #prediction
        prediction = model.predict(img_array)

        # Defining font, scales and thickness.
        fontFace = cv.FONT_HERSHEY_DUPLEX 
        text_scale = 1.2
        
        #printing of prediction probability and class into the frame
        if prediction > 0.5:
            text = f'"PASS": {np.round(prediction[0][0]*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(0, 255, 0), lineType=cv.LINE_AA)
        else:
            text = f'"FAIL": {np.round(prediction[0][0]*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(0, 0, 255), lineType=cv.LINE_AA)

        #video output
        cv.imshow("Prediction", frame)

        #out.write(frame) #saving the video output
        key=cv.waitKey(1)
        if key == ord('q'):
                break

out.release()
cv.destroyAllWindows()

In [21]:
#release camera if job is done
cap.release()

###  Finished Assembly Model Live Video Classification (ROI Dataset/Binary)

In [12]:
model = tf.keras.models.load_model('../model/cnn_model_finished_cropped/')
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 250, 200, 16)      448       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 125, 100, 16)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 123, 98, 64)       9280      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 61, 49, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 191296)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                12243008  
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)               

In [13]:
#setting video format
fourcc = cv.VideoWriter_fourcc('m', 'p', '4', 'v')
out = cv.VideoWriter('../video/output_finished_v.avi', fourcc, fps, (int(width), int(height)))

time.sleep(2)

while True:
        _, frame = cap.read()

        #Convert the captured frame into RGB
        imageRGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        
        #image transformations
        im = Image.fromarray(imageRGB)
        #im = im.resize((320,240)) #for non-ROI models
        img_array = np.array(im) / 255

        #for ROI models. setting pixels for cropping ROIs
        cropped_1strow = np.concatenate((img_array[101:152, 385:435], img_array[101:152, 561:611], img_array[101:152, 655:705], img_array[101:152, 831:881]), axis = 1)
        cropped_2ndrow = np.concatenate((img_array[258:323, 375:425], img_array[258:323, 555:605], img_array[258:323, 660:710], img_array[258:323, 838:888]), axis = 1)
        cropped_3rdrow = np.concatenate((img_array[353:417, 370:420], img_array[353:417, 550:600], img_array[353:417, 664:714], img_array[353:417, 845:895]), axis = 1)
        cropped_4throw = np.concatenate((img_array[549:619, 356:406], img_array[549:619, 550:600], img_array[549:619, 664:714], img_array[549:619, 856:906]), axis = 1)
        cropped = np.concatenate((cropped_1strow, cropped_2ndrow, cropped_3rdrow, cropped_4throw), axis = 0)
                
        #video output of the cropped video
        croppedBGR = cv.cvtColor(cropped.astype(np.float32), cv.COLOR_RGB2BGR)
        cv.imshow("Cropped", croppedBGR)

        #Expand dimensions to match the 4D Tensor shape for prediction
        cropped = np.expand_dims(cropped, axis=0) #img_array = np.expand_dims(img_array, axis=0) # for non-ROI models

        #prediction
        prediction = model.predict(cropped)

        #Final Picture
        #setting rectangles for ROI models
        #1st Row
        frame = cv.rectangle(frame, (385, 101), (434, 152), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (561, 101), (615, 152), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (650, 101), (705, 152), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (833, 101), (881, 152), (0, 255, 255), thickness=2)
        #2nd Row
        frame = cv.rectangle(frame, (367, 258), (426, 323), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (554, 258), (610, 323), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (649, 258), (709, 323), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (840, 258), (898, 323), (0, 255, 255), thickness=2)
        #3rd Row
        frame = cv.rectangle(frame, (360, 353), (419, 417), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (552, 353), (610, 417), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (649, 353), (710, 417), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (843, 353), (901, 417), (0, 255, 255), thickness=2)
        #4th Row
        frame = cv.rectangle(frame, (343, 549), (407, 617), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (547, 549), (607, 617), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (650, 549), (714, 617), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (855, 549), (917, 617), (0, 255, 255), thickness=2)
        
        # Defining font, scales and thickness.
        fontFace = cv.FONT_HERSHEY_DUPLEX 
        text_scale = 1.2
        
        #printing of prediction probability and class into the frame
        if prediction > 0.5:
            text = f'"PASS": {np.round(prediction[0][0]*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(0, 255, 0), lineType=cv.LINE_AA)
        else:
            text = f'"FAIL": {np.round(prediction[0][0]*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(0, 0, 255), lineType=cv.LINE_AA)

        #video output
        cv.imshow("Prediction", frame)

        #out.write(frame) #saving the video output
        key=cv.waitKey(1)
        if key == ord('q'):
                break

out.release()
cv.destroyAllWindows()

In [17]:
#release camera if job is done
cap.release()

### Production Model Live Video Classification (ROI Dataset/Multiclass)


In [14]:
model = tf.keras.models.load_model('../model/cnn_model_production/')
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 250, 200, 16)      448       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 125, 100, 16)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 123, 98, 64)       9280      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 61, 49, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 191296)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                12243008  
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)               

In [24]:
fourcc = cv.VideoWriter_fourcc('m', 'p', '4', 'v')
out = cv.VideoWriter('../video/test.avi', fourcc, fps, (int(width), int(height)))

while True:
        _, frame = cap.read()
        
        #Convert the captured frame into RGB
        imageRGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
        
        #image transformations
        im = Image.fromarray(imageRGB)
        img_array = np.array(im) / 255

        #for ROI models. setting pixels for cropping ROIs
        cropped_1strow = np.concatenate((img_array[101:152, 385:435], img_array[101:152, 561:611], img_array[101:152, 655:705], img_array[101:152, 831:881]), axis = 1)
        cropped_2ndrow = np.concatenate((img_array[258:323, 375:425], img_array[258:323, 555:605], img_array[258:323, 660:710], img_array[258:323, 838:888]), axis = 1)
        cropped_3rdrow = np.concatenate((img_array[353:417, 370:420], img_array[353:417, 550:600], img_array[353:417, 664:714], img_array[353:417, 845:895]), axis = 1)
        cropped_4throw = np.concatenate((img_array[549:619, 356:406], img_array[549:619, 550:600], img_array[549:619, 664:714], img_array[549:619, 856:906]), axis = 1)
        cropped = np.concatenate((cropped_1strow, cropped_2ndrow, cropped_3rdrow, cropped_4throw), axis = 0)
                
        #video output of the cropped video
        croppedBGR = cv.cvtColor(cropped.astype(np.float32), cv.COLOR_RGB2BGR)
        cv.imshow("Cropped", croppedBGR)

        #Expand dimensions to match the 4D Tensor shape for prediction
        cropped = np.expand_dims(cropped, axis=0)

        #prediction
        prediction = np.sort(model.predict(cropped))[0][-1] 
        predict_class = np.argmax(model.predict(cropped))

        #Final Picture
        #setting rectangles for ROI models
        #1st Row
        frame = cv.rectangle(frame, (385, 101), (434, 152), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (561, 101), (615, 152), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (650, 101), (705, 152), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (833, 101), (881, 152), (0, 255, 255), thickness=2)
        #2nd Row
        frame = cv.rectangle(frame, (367, 258), (426, 323), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (554, 258), (610, 323), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (649, 258), (709, 323), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (840, 258), (898, 323), (0, 255, 255), thickness=2)
        # #3rd Row
        frame = cv.rectangle(frame, (360, 353), (419, 417), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (552, 353), (610, 417), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (649, 353), (710, 417), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (843, 353), (901, 417), (0, 255, 255), thickness=2)
        # #4th Row
        frame = cv.rectangle(frame, (343, 549), (407, 617), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (547, 549), (607, 617), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (650, 549), (714, 617), (0, 255, 255), thickness=2)
        frame = cv.rectangle(frame, (855, 549), (917, 617), (0, 255, 255), thickness=2)
        
        # Defining font, scales and thickness.
        fontFace = cv.FONT_HERSHEY_DUPLEX 
        text_scale = 1.2
        
        #printing of prediction probability and class into the frame
        if predict_class == 0:
            text = f'"FAIL": {np.round(prediction*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(0, 0, 255), lineType=cv.LINE_AA)
        elif predict_class == 1:
            text = f'"PASS": {np.round(prediction*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(0, 255, 0), lineType=cv.LINE_AA)
        elif predict_class == 2:
            text = f'"EMPTY": {np.round(prediction*100,2)}%'
            frame = cv.putText(frame, text, org=(10,35), fontFace=fontFace, fontScale=text_scale, thickness=2, color=(255, 255, 255), lineType=cv.LINE_AA)

        #video output
        cv.imshow("Prediction", frame)
        
        # out.write(frame) #saving the video output
        key=cv.waitKey(1)
        if key == ord('q'):
                break

out.release()
cv.destroyAllWindows()

In [15]:
#release camera if job is done
cap.release()