## Dataset preparation from online for custom YOLO

In [1]:
from google_images_download import google_images_download
import os
original_path = os.getcwd()
data_path = os.path.join(os.path.dirname(original_path),'downloads')
response = google_images_download.googleimagesdownload()
search_queries = ['picture with dogs']

In [2]:
def downloadimages(query):
    arguments = dict(keywords=query,
                    limit=1000,
                    format='jpg',
                    output_directory=data_path,
                    chromedriver='C:\\Users\\sungsooc\\Documents\\chromedriver_win32\\chromedriver.exe', 
                    silent_mode=True)   
    response.download(arguments)                          

In [3]:
download_msg = []
for query in search_queries: 
    downloadimages(query)
    msg = 'Downloaded {} {} images!'.format(len(os.listdir(os.path.join(data_path,query))),query)
    download_msg.append(msg)
for msg in download_msg:
    print(msg)

Downloading images for: picture with dogs ...
Getting you a lot of images. This may take a few moments...
Reached end of Page.


Unfortunately all 1000 could not be downloaded because some images were not downloadable. 807 is all we got for this search filter!
Downloaded 807 picture with dogs images!


In [4]:
working_dir = os.path.join(data_path,'picture with dogs')
os.chdir(working_dir)
current_list = os.listdir(os.getcwd())
for i in range(len(os.listdir(working_dir))):
    original_name = current_list[i]
    new_name = "".join('picture with dogs'.split()) + '_{:04d}'.format(i+1) + os.path.splitext(original_name)[-1]
    if not os.path.exists(new_name):
        os.rename(original_name,new_name)
os.chdir(original_path)

In [2]:
# dummy

## labelimg_v1.8.0 ([source](https://github.com/tzutalin/labelImg))
<p align="center">
<img src="../Readme_images/hdd_v2_labelling_example.png" width="700"></p>
</p>

## Import modules

In [None]:
from darkflow.net.build import TFNet
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import os
import numpy as np
from keras.models import load_model
import pafy
import datetime as dt
import time

## Config custom darkflow

In [None]:
config = tf.ConfigProto(log_device_placement=True)
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess:
    options = {
        'model':os.path.join('cfg','yolov2_hddv2.cfg'), ## for custom model
        'load':os.path.join('bin','yolov2.weights'),
        'epoch':2,
        'batch':1,
        'train':True,
        'dataset':os.path.join('..','downloads','picture with dogs'),
        'annotation':os.path.join('..','downloads','picture with dogs-annot'),
        'gpu':0.8
    }
    tfnet = TFNet(options)

In [None]:
tfnet.train()

In [None]:
with tf.Session(config=config) as sess:
    options = {
        'model':os.path.join('cfg','yolov2_hddv2.cfg'), ## for custom model
        'load':-1,
        'gpu':1.0,
        'threshold':0.3
    }
    tfnet2 = TFNet(options)

In [None]:
tfnet2.load_from_ckpt()

In [None]:
img = cv2.imread('test_images/3.jpg')
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
results = tfnet2.return_predict(img)
print(results)

In [None]:
def plot_box(img,prediction):
    newimg = np.copy(img)
    for r in prediction:
        tl = (r['topleft']['x'], r['topleft']['y'])
        br = (r['bottomright']['x'], r['bottomright']['y'])
        label = r['label']
        if label == 'dog':
            newimg = cv2.rectangle(img,tl,br,(255,0,0),thickness=4)
        if label == 'dogff':
            newimg = cv2.rectangle(img,tl,br,(0,0,255),thickness=4)
            
#         conf = r['confidence'] # confidence of general dog detector
#         text = '{}({:.2f})'.format(label,conf)
#         text = '{}({:.3f})'.format(predict_class,predict_rate)
#         img = cv2.rectangle(img, box_coords[0], box_coords[1], box_color, cv2.FILLED)
#         newimg = cv2.rectangle(img,tl,br,(255,0,0),thickness=4)
#         img = cv2.putText(img,text,tl,cv2.FONT_HERSHEY_PLAIN,2,txt_color,3)
    return newimg

In [None]:
fig,ax = plt.subplots()
ax.imshow(plot_box(img,results))
ax.axis('off')

## Import hdc_v2 and co-register

In [None]:
model_file_name = 'best-wiehgts-Model_2_2-057-0.314-0.870.hdf5'
chosen_model = load_model(os.path.join('best_models_hdc_v2',model_file_name))

In [None]:
def HappyPrediction(img):
    crop_img = img[result[0]['topleft']['y']:result[0]['bottomright']['y'],result[0]['topleft']['x']:result[0]['bottomright']['x']]
    img_rows = 224
    img_cols = 224
    crop_img = cv2.resize(crop_img,(img_rows,img_cols)).astype('float32')
    crop_img /= 255
    crop_img = np.expand_dims(crop_img,axis=0)
    predict_rate = chosen_model.predict(crop_img)[0][0]
    if predict_rate >= 0.5:
        predict_class = 'happy'
    else: predict_class = 'sad'
    return predict_class,predict_rate


In [None]:
happy_box_color = (255,0,0)
sad_box_color = (0,0,255)
txt_color = (255,255,255)

In [None]:
fig, axes = plt.subplots(3, 2,figsize=(8*2, 6*3))
for ax,img_num in zip(axes.flatten(),range(1,7)):
    img = cv2.imread(os.path.join('custom_darkflow','test_images',str(img_num)+'.jpg'))
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    result = tfnet.return_predict(img)
    for r in result:
        tl = (r['topleft']['x'], r['topleft']['y'])
        br = (r['bottomright']['x'], r['bottomright']['y'])
        label = r['label']
        if label == 'dog':     
#             conf = r['confidence'] # confidence of general dog detector
#             text = '{}({:.2f})'.format(label,conf)
            predict_class,predict_rate = HappyPrediction(img) # confidence of happy dog classifier
            text = '{}({:.3f})'.format(predict_class,predict_rate)
            (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN,2,5)[0]
            text_offset_x = tl[0]-3
            text_offset_y = tl[1]
            box_coords = ((text_offset_x, text_offset_y+10), (text_offset_x+text_width,text_offset_y-text_height-10))
            if predict_class == 'happy':
                box_color = happy_box_color
            else:
                box_color = sad_box_color
            img = cv2.rectangle(img, box_coords[0], box_coords[1], box_color, cv2.FILLED)
            img = cv2.rectangle(img,tl,br,box_color,thickness=4)
            img = cv2.putText(img,text,tl,cv2.FONT_HERSHEY_PLAIN,2,txt_color,3)
    ax.imshow(img)
    ax.axis('off')
plt.subplots_adjust(left=0.2, wspace=0)

## single example excution time

In [None]:
%%timeit
img = cv2.imread(os.path.join('custom_darkflow','test_images','6.jpg'))
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
result = tfnet.return_predict(img)
for r in result:
    tl = (r['topleft']['x'], r['topleft']['y'])
    br = (r['bottomright']['x'], r['bottomright']['y'])
    label = r['label']
    if label == 'dog':     
#             conf = r['confidence'] # confidence of general dog detector
#             text = '{}({:.2f})'.format(label,conf)
        predict_class,predict_rate = HappyPrediction(img) # confidence of happy dog classifier
        text = '{}({:.2f})'.format(predict_class,predict_rate)
        (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN,2,5)[0]
        text_offset_x = tl[0]-3
        text_offset_y = tl[1]
        box_coords = ((text_offset_x, text_offset_y+10), (text_offset_x+text_width,text_offset_y-text_height-10))
        if predict_class == 'happy':
            box_color = happy_box_color
        else:
            box_color = sad_box_color
        img = cv2.rectangle(img, box_coords[0], box_coords[1], box_color, cv2.FILLED)
        img = cv2.rectangle(img,tl,br,box_color,thickness=4)
        img = cv2.putText(img,text,tl,cv2.FONT_HERSHEY_PLAIN,2,txt_color,3)
plt.imshow(img)
plt.axis('off')
plt.subplots_adjust(left=0.2, wspace=0)

# for video input

In [None]:
## only showing happy dogs ##
url = 'https://www.youtube.com/watch?v=0lEUiQEDUHM'
pa = pafy.new(url)
play = pa.getbest(preftype='webm')
cap = cv2.VideoCapture(play.url)

if (cap.isOpened() == False):
    print('cannot read a video')
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('HDD_testing.avi',fourcc,20.0,(640,360))

while cap.isOpened():
    stime = time.time()
    ret,frame = cap.read()
    
    if ret == True:
        frame = np.asarray(frame)
        result = tfnet.return_predict(frame)
        new_frame = np.copy(frame)
        for r in result:
            tl = (r['topleft']['x'], r['topleft']['y'])
            br = (r['bottomright']['x'], r['bottomright']['y'])
            label = r['label']
            if label == 'dog':
                predict_class,predict_rate = HappyPrediction(new_frame) # confidence of happy dog classifier
                if predict_class == 'happy':       
                    text = '{}({:.2f})'.format(predict_class,predict_rate)
                    box_color = happy_box_color
                    (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN,2,5)[0]
                    text_offset_x = tl[0]-3
                    text_offset_y = tl[1]
                    box_coords = ((text_offset_x, text_offset_y+10), (text_offset_x+text_width,text_offset_y-text_height-10))
                    new_frame = cv2.rectangle(new_frame, box_coords[0], box_coords[1], box_color, cv2.FILLED)
                    new_frame = cv2.rectangle(new_frame,tl,br,box_color,thickness=4)
                    new_frame = cv2.putText(new_frame,text,tl,cv2.FONT_HERSHEY_PLAIN,2,txt_color,3)
        fps = 1/(time.time()-stime)
        new_frame = cv2.putText(new_frame,'fps: '+format(fps, '.2f'),(0,15),cv2.FONT_HERSHEY_PLAIN,1,(0,0,0),2)
        out.write(new_frame)
        cv2.imshow('frame',new_frame)
        if cv2.waitKey(1) & 0xff == ord('q'):
            break
    else:
        break
cv2.destroyAllWindows()
cap.release()
out.release()


In [None]:
url = 'https://www.youtube.com/watch?v=0lEUiQEDUHM'
pa = pafy.new(url)
play = pa.getbest(preftype='webm')
cap = cv2.VideoCapture(play.url)

if (cap.isOpened() == False):
    print('cannot read a video')
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('HDD_testing.avi',fourcc,20.0,(640,360))

while cap.isOpened():
    stime = time.time()
    ret,frame = cap.read()
    
    if ret == True:
        frame = np.asarray(frame)
        result = tfnet.return_predict(frame)
        new_frame = np.copy(frame)
        for r in result:
            tl = (r['topleft']['x'], r['topleft']['y'])
            br = (r['bottomright']['x'], r['bottomright']['y'])
            label = r['label']
            if label == 'dog':
                predict_class,predict_rate = HappyPrediction(new_frame) # confidence of happy dog classifier
                if predict_class == 'happy':       
                    text = '{}({:.2f})'.format(predict_class,predict_rate)
                    box_color = happy_box_color
                else:
                    text = '{}'.format(predict_class)
                    box_color = sad_box_color
                (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN,2,5)[0]
                text_offset_x = tl[0]-3
                text_offset_y = tl[1]
                box_coords = ((text_offset_x, text_offset_y+10), (text_offset_x+text_width,text_offset_y-text_height-10))
#                 if predict_class == 'happy':
#                     box_color = happy_box_color
#                 else:
#                     box_color = sad_box_color
                new_frame = cv2.rectangle(new_frame, box_coords[0], box_coords[1], box_color, cv2.FILLED)
                new_frame = cv2.rectangle(new_frame,tl,br,box_color,thickness=4)
                new_frame = cv2.putText(new_frame,text,tl,cv2.FONT_HERSHEY_PLAIN,2,txt_color,3)
        fps = 1/(time.time()-stime)
        new_frame = cv2.putText(new_frame,'fps: '+format(fps, '.2f'),(0,15),cv2.FONT_HERSHEY_PLAIN,1,(0,0,0),2)
        out.write(new_frame)
        cv2.imshow('frame',new_frame)
        if cv2.waitKey(1) & 0xff == ord('q'):
            break
    else:
        break
cv2.destroyAllWindows()
cap.release()
out.release()


## for webcam input

In [None]:
# colors = (tuple(255*np.random.rand(3) for _ in range(10)))

In [None]:
# capture = cv2.VideoCapture(0)
# capture.set(cv2.CAP_PROP_FRAME_WIDTH,1920)
# capture.set(cv2.CAP_PROP_FRAME_HEIGHT,1080)

In [None]:
# while True:
#     stime = time.time()
#     ret,frame = capture.read()
    
#     if ret:
#         results = tfnet.return_predict(frame)
#         for color,r in zip(colors,results):
#             tl = (r['topleft']['x'], r['topleft']['y'])
#             br = (r['bottomright']['x'], r['bottomright']['y'])
#             label = r['label']
#             frame = cv2.rectangle(frame,tl,br,color,5)
#             frame = cv2.putText(frame,label,tl,cv2.FONT_HERSHEY_PLAIN,2,color,3)
#         cv2.imshow('frame',frame)
#         print('FPS {:.1f}'.format(1/(time.time()-stime)))
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break
# capture.release()
# cv2.destroyAllWindows()