<a href="https://colab.research.google.com/github/plvckn/yolo-vgg-binary/blob/main/yolo_vgg_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup

In [1]:
# Mount to drive and cd to darkflow repo
from google.colab import drive
drive.mount('/content/drive')
import sys
foldername = 'darkflow'
sys.path.append(f'/content/drive/My Drive/custom-yolo/{foldername}')
%cd /content/drive/My\ Drive/custom-yolo/$foldername
!ls

Mounted at /content/drive
/content/drive/My Drive/custom-yolo/darkflow
bin		      ckpt	labels-coco.txt  sample_img  video.avi
binaryYoloLabels.txt  darkflow	labels.txt	 setup.py    yolov2-voc-1c.cfg
build		      demo	LICENSE		 test
built_graph	      demo.gif	preview.png	 test_img
cfg		      flow	README.md	 train


In [None]:
# Packages
from PIL import Image
import cv2
import os
from os.path import join
import glob
import json
import numpy as np
%load_ext autoreload
%autoreload 2

In [None]:
# Define paths to save video frames, detection and classification results
SAVE_FOLDER = 'frames_test3' 
if not os.path.exists(f'demo/{SAVE_FOLDER}'): 
  os.makedirs(f'demo/{SAVE_FOLDER}') 
if not os.path.exists(f'demo/processed/{SAVE_FOLDER}'):
  os.makedirs(f'demo/processed/{SAVE_FOLDER}')
imgdir = f'demo/{SAVE_FOLDER}'
out_name = 'test-02t-bigcrops-keep-ratio' # processed video filename

### Utils

In [None]:
# Various utility and helper functions

def deserialize_json(obj_file):
  f = open(obj_file, 'r')
  json_str = json.loads(f.read())
  json_obj = json.loads(json_str[1:-1].replace("\'", "\""))
  return json_obj

def read_img(img_id):
  img_id = img_id.replace('.json', '.jpg')
  img = cv2.imread(join(imgdir, img_id))
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  return img

def resize(img, keep_ratio=False):
  return np.expand_dims(tf.image.resize(img, (224,224), preserve_aspect_ratio=keep_ratio).numpy(), 0)

def get_crop(obj, img):
  xmin = obj['topleft']['x']
  ymin = obj['topleft']['y']
  xmax = obj['bottomright']['x']
  ymax = obj['bottomright']['y']
  return img[ymin:ymax,xmin:xmax,:]

def get_bigger_crop(obj, img, ratio=0.1):
  y, x, _ = img.shape
  xmin = obj['topleft']['x']
  ymin = obj['topleft']['y']
  xmax = obj['bottomright']['x']
  ymax = obj['bottomright']['y']
  #crop more of the picture without going over the edges
  xextend = int((xmax-xmin)*ratio)
  yextend = int((ymax-ymin)*ratio)
  xmin = max(xmin-xextend,0)
  ymin = max(ymin-yextend,0)
  xmax = min(xmax+xextend,x)
  ymax = min(ymax+yextend,y)
  return img[ymin:ymax,xmin:xmax,:]

def draw_boxes(objects, image):
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  for obj in objects:
      tl = (obj['topleft']['x'], obj['topleft']['y'])
      br = (obj['bottomright']['x'], obj['bottomright']['y'])
      predscore = obj['predscore']
      mismatch = float(predscore) > 0.5 # yolo detected lemon, but classifier predicted not lemon
      if not mismatch:
        label = obj['label']
        conf = obj['confidence']
        text = '{}: {:.0f}%'.format(label, conf * 100)
        image = cv2.rectangle(image, tl, br, color=(0,0,0), thickness=5)
        image = cv2.putText(image, text, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
      else:
        image = cv2.rectangle(image, tl, br, color=(0,0,255), thickness=5)
        text = 'CLASSIFIER MISMATCH'
        image = cv2.putText(image, text, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2)
  return image

### Load yolo model (detection) 

In [None]:
!pip uninstall -y tensorflow
!pip install tensorflow-gpu==1.14 #darkflow needs TF 1.x

In [None]:
from darkflow.net.build import TFNet

In [None]:
# Detection config
DETECTION_THRESHOLD = 0.2
options = {
    "metaLoad": "built_graph/yolov2-voc-1c.meta",
    "pbLoad": "built_graph/yolov2-voc-1c.pb",
    "threshold": DETECTION_THRESHOLD,
    "gpu": 0.9
}

In [None]:
tfnet = TFNet(options)

### Get detections from video (YOLO)

In [None]:
'''
Steps:
1. Iterate through the video frame by frame
2. Pass frame to object detection model and get bounding box results as python dict
3. Save the frame as .jpg and the corresponsing python dict with results as .json file
'''
SAVE_EVERY = 1 # save every n'th video frame as an image and pass it to yolo
video_path = 'demo/live_video/video3.mp4'
vidcap = cv2.VideoCapture(video_path)
success,image = vidcap.read()
count = 0
while success:     
  success, image = vidcap.read()
  if success:
    if count % SAVE_EVERY == 0:
      image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
      results = tfnet.return_predict(image)
      cv2.imwrite(f'demo/{SAVE_FOLDER}/frame{count}.jpg', image)
      with open(f'demo/{SAVE_FOLDER}/frame{count}.json', 'w') as f:
        json.dump(json.dumps(str(results)), f)
  count += 1
vidcap.release()
cv2.destroyAllWindows()

### Classify image crops (VGG)

In [None]:
#!pip install tensorflow==2.6.0 # needs to be TF 2.X for classification, restart runtime after installing from 1.X to 2.X and reimport packages
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.vgg16 import preprocess_input

In [None]:
# Classification config
keep_aspect_ratio = False
bigger_crops = True

In [None]:
# Get json files
json_files = sorted(
    glob.glob(f'demo/{SAVE_FOLDER}/*.json'),
    key = lambda filepath: int(os.path.basename(filepath).split('.')[0].split('frame')[1])
)

# Load model
model_path = r'/content/drive/My Drive/custom-yolo/lemon_binary_classification/vgg16_binary_base'
model = load_model(model_path)

In [None]:
'''
Steps:
1. Iterate over every json file containing yolo detected bounding-boxes.
2. Each json file is a list of detected objects in one image frame, containing object coordinates (topleft xy, bottomright xy), confidence, label.
3. Extract crops from the image based on json data.
4. Pass crops to a binary vgg for classification of lemon/not lemon.
5. Draw bounding boxes based on the results of both yolo and vgg:
  a. If yolo detects a lemon and vgg classifies the crop as 'lemon' draw a regular bounding box displaying class and confidence.
  b. If yolo detects a lemon but vgg classifies 'not lemon' draw a false bounding box indicating classifier and detector mismatch.
6. Save updated results in .jpg (for images) and .json (for detection/classification data)
'''
frame_count = 0
for json_file in json_files:
  img_id = os.path.basename(json_file)
  img = read_img(img_id)
  json_obj = deserialize_json(json_file)
  for obj in json_obj:
    if not bigger_crops:
      obj_crop = get_crop(obj, img)
    else:
      obj_crop = get_bigger_crop(obj, img)
    obj_crop = resize(obj_crop, keep_ratio=keep_aspect_ratio)
    result = model.predict(preprocess_input(obj_crop.copy()))
    predscore = result[0][0] 
    predlabel = 'lemon' if predscore <= 0.5 else 'not lemon'
    obj.update({'predscore': str(round(predscore,2)), 'predlabel': predlabel})
  img = draw_boxes(json_obj, img)
  cv2.imwrite(f'demo/processed/{out_name}/frame{frame_count}.jpg', img)
  with open(f'demo/processed/{out_name}/frame{frame_count}.json', 'w') as f:
    json.dump(json_obj, f)
  frame_count += 1
  if frame_count % 25 == 0:
    print(f'processed {frame_count} frames')

processed 25 frames
processed 50 frames
processed 75 frames
processed 100 frames
processed 125 frames
processed 150 frames
processed 175 frames
processed 200 frames
processed 225 frames
processed 250 frames
processed 275 frames
processed 300 frames
processed 325 frames
processed 350 frames
processed 375 frames
processed 400 frames


### Recreate video from processed frames

In [None]:
# Define video capture settings
out = out_name+'.avi'
outpath = join('demo/processed', out)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
height, width, depth = cv2.imread(f'{imgdir}/frame0.jpg').shape
videoWriter = cv2.VideoWriter(outpath, fourcc, 30, (width,height))

In [None]:
# Get preprocessed frames
frames = glob.glob(f'demo/processed/{out_name}/*.jpg')
frames = sorted(frames, key = lambda filepath: int(os.path.basename(filepath).split('.')[0].split('frame')[1]))

In [None]:
# Assemble a video from frames
for frame in frames:
  im = cv2.imread(frame)
  videoWriter.write(im)
videoWriter.release()
cv2.destroyAllWindows()