In [None]:
import os
import cv2
import json
import matplotlib.pyplot as plt
import tensorflow as tf
from PIL import Image
import fiftyone as fo
import fiftyone.zoo as foz
from pycocotools.coco import COCO
import numpy as np
from sklearn.model_selection import train_test_split

#dataset = foz.load_zoo_dataset("coco-2017")

In [None]:
#Extract video frames
def extract_videoframes(videoPath):
    capture = cv2.VideoCapture(videoPath)
    frames = []
    while capture.isOpened():
        ret, frame = capture.read()
        if not ret:
            print("Error: cannot receive frame")
            break
        frames.append(frame)
    capture.release()
    return frames
    

In [None]:
coco_images_dir = "/Users/ivanng/dataset/coco-2017/train/data"
coco_ann_file_caption = "/Users/ivanng/dataset/coco-2017/raw/captions_train2017.json"
coco_ann_file_detection = "/Users/ivanng/dataset/coco-2017/raw/instances_train2017.json"

#Load Coco annotations
def loadCocoAnnotations(coco_ann_file_detection):
    #Load COCO image annotation
    coco = COCO(coco_ann_file_detection)
    #Obtain category information
    categories = coco.loadCats(coco.getCatIds())
    categoryMap = {category['id']:category['name'] for category in categories}
    return coco, categoryMap

#Preprocess images
def preprocessImage(image, targetSize=(128,128)):
    height, width = image.shape[:2]
    scale = min(targetSize[0]/height, targetSize[1]/width)
    scaledHeight, scaledWidth = int(height*scale), int(width*scale)
    #Resize image
    resizedImage = cv2.resize(image, (scaledWidth, scaledHeight))
    #Padding image to targetSize
    imagePadded = np.full((targetSize[0], targetSize[1], 3), 128, dtype=np.uint8)
    x_offset = (targetSize[1]-scaledWidth)//2
    y_offset = (targetSize[0]-scaledHeight)//2
    imagePadded[y_offset:y_offset+scaledHeight, x_offset:x_offset+scaledWidth] = resizedImage

    #Normalise to [0, 1]
    imagePadded = imagePadded/255.0
    return imagePadded, scale, x_offset, y_offset

#Preprocess bounding boxes
def preprocessBboxes(bboxes, scale, x_offset, y_offset):
    newBboxes = []
    for bbox in bboxes:
        x, y, width, height = bbox
        xStart = x*scale+x_offset
        yStart = y*scale+y_offset
        xEnd = xStart+width*scale
        yEnd = yStart+height*scale
        newBboxes.append([xStart, yStart, xEnd, yEnd])
    return newBboxes

#Get entire preprocess data
def getImageAnnotations(coco, coco_images_dir, categoryMap, targetSize=(128, 128)):
    data = []
    for i, imageID in enumerate(coco.getImgIds()):
        imageInfo = coco.loadImgs(imageID)[0]
        #Load image
        imagePath = os.path.join(coco_images_dir, imageInfo['file_name'])
        image = cv2.imread(imagePath)
        if image is None: continue
        annotations = coco.loadAnns(coco.getAnnIds(imgIds=imageID))
        #Obtain labels, bboxes
        labels = [categoryMap[ann['category_id']] for ann in annotations if 'category_id' in ann]
        bboxes = [ann['bbox'] for ann in annotations if 'bbox' in ann]
        #Invoke image preprocess
        preprocessedImage, scale, x_offset, y_offset = preprocessImage(image, targetSize)
        #Invoke bboxes preprocess
        preprocessedBboxes = preprocessBboxes(bboxes, scale, x_offset, y_offset)
        
        data.append((preprocessedImage, preprocessedBboxes, labels))

        if i<5:
            visualise(preprocessedImage, preprocessedBboxes, labels)
    return data

#Temporary visualisation of bboxes and labels on images
def visualise(image, bboxes, labels):
    image = (image*255).astype(np.uint8)
    for i, box in enumerate(bboxes):
        x1, y1, x2, y2 = map(int, box)
        label = labels[i]
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1)
        cv2.putText(image, labels[i], (x1, y1-2), cv2.FONT_HERSHEY_SIMPLEX, 0.2, (0, 255, 0), 1)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis("off")
    plt.show()

In [None]:
#Detection model framework
def detectionModel(numClasses, inputShape=(128,128,3)):
    inputs = tf.keras.Input(shape=inputShape)
    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = tf.keras.layers.MaxPooling2D((2,2))(x)
    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D((2,2))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    outputBbox = tf.keras.layers.Dense(4, activation='sigmoid')(x)
    outputClass = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs=inputs, outputs=[outputBbox, outputClass])
    return model
#Model training
def modelTraining(model, data, num_classes, epochs=):
    images

In [None]:
#Temporary execution
coco, categoryMap = loadCocoAnnotations(coco_ann_file_detection)
data = getImageAnnotations(coco, coco_images_dir, categoryMap)