<a href="https://colab.research.google.com/github/mralamdari/Computer-Vision-Projects/blob/main/Arthropod_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## [Arthropod Taxonomy Orders Object Detection Dataset](https://www.kaggle.com/datasets/mistag/arthropod-taxonomy-orders-object-detection-dataset/)

In [25]:
import numpy as np
import pandas as pd
import tensorflow as tf
# import utils_box as box
import matplotlib as mpl
import concurrent.futures
from threading import Lock
import json, math, random, os, pprint
from matplotlib import pyplot as plt
AUTO = tf.data.AUTOTUNE
pp = pprint.PrettyPrinter()

# from tensorflow_models.vision import box_ops as boxutils

In [None]:
os.environ['KAGGLE_CONFIG_DIR']='/content/drive/MyDrive/'
!kaggle datasets download -d mistag/arthropod-taxonomy-orders-object-detection-dataset
!unzip *.zip && rm *.zip

In [5]:
images_per_shard = 481
target_width = 1024

CLASSES = ['Lepidoptera', 'Hymenoptera', 'Hemiptera', 'Odonata', 'Diptera', 'Araneae', 'Coleoptera']

RAW_CLASSES = CLASSES + ['_truncated', '_blurred', '_occluded']

In [57]:
class Progress:
    """Text mode progress bar.
    Usage:
            p = Progress(30)
            p.step()
            p.step()
            p.step(start=True) # to restart form 0%
    The progress bar displays a new header at each restart."""
    def __init__(self, maxi, size=100, msg=""):
        """
        :param maxi: the number of steps required to reach 100%
        :param size: the number of characters taken on the screen by the progress bar
        :param msg: the message displayed in the header of the progress bat
        """
        self.maxi = maxi
        self.p = self.__start_progress(maxi)()  # () to get the iterator from the generator
        self.header_printed = False
        self.msg = msg
        self.size = size
        self.lock = Lock()

    def step(self, reset=False):
        with self.lock:
            if reset:
                self.__init__(self.maxi, self.size, self.msg)
            if not self.header_printed:
                self.__print_header()
            next(self.p)

    def __print_header(self):
        print()
        format_string = "0%{: ^" + str(self.size - 6) + "}100%"
        print(format_string.format(self.msg))
        self.header_printed = True

    def __start_progress(self, maxi):
        def print_progress():
            # Bresenham's algorithm. Yields the number of dots printed.
            # This will always print 100 dots in max invocations.
            dx = maxi
            dy = self.size
            d = dy - dx
            for x in range(maxi):
                k = 0
                while d >= 0:
                    print('=', end="", flush=True)
                    k += 1
                    d -= dx
                d += dy
                yield k

        return print_progress

    
def no_decorations(ax):
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    

def display_detections(images, offsets, resizes, detections, classnames, ground_truth_boxes=[]):
    # scale and offset the detected boxes back to original image coordinates
    boxes   = [[ (x,y,w,h)  for _, x, y, w, h, score, klass in detection_list] for detection_list in detections]
    boxes   = [[ (x-ofs[1], y-ofs[0], w, h) for x,y,w,h in boxlist ] for boxlist, ofs in zip(boxes, offsets)]
    boxes   = [[ (x*rsz, y*rsz, w*rsz, h*rsz) for x,y,w,h in boxlist ] for boxlist, rsz in zip(boxes, resizes)]
    classes = [[ int(klass) for _, x, y, w, h, score, klass in detection_list] for detection_list in detections]
    scores  = [[ score      for _, x, y, w, h, score, klass in detection_list] for detection_list in detections]
    display_with_boxes(images, boxes, classes, scores, classnames, ground_truth_boxes)
    
    
# images, boxes and classes must have the same number of elements
# scores can be en empty list []. If it is not empty, it must also
# have the same number of elements.
# classnames is the list of possible classes (strings)
def display_with_boxes(images, boxes, classes, scores, classnames, ground_truth_boxes=[]):
    N = len(images)
    sqrtN = int(math.ceil(math.sqrt(N)))
    aspect = sum([im.shape[1]/im.shape[0] for im in images])/len(images) # mean aspect ratio of images
    fig = plt.figure(figsize=(15,15/aspect), frameon=False)
    
    for k in range(N):
        ax = plt.subplot(sqrtN, sqrtN, k+1)
        no_decorations(ax)
        plt.imshow(images[k])
        
        if ground_truth_boxes:
            for box in ground_truth_boxes[k]:
                x, y, w, h = (box[0], box[1], box[2]-box[0], box[3]-box[1]) # convert x1 y1 x2 y2 into xywh
                #x, y, w, h = (box[0], box[1], box[2], box[3])
                rect = mpl.patches.Rectangle((x, y),w,h,linewidth=4,edgecolor='#FFFFFFA0',facecolor='none')
                ax.add_patch(rect)

        for i, (box, klass) in enumerate(zip(boxes[k], classes[k])):
            x, y, w, h = (box[0], box[1], box[2]-box[0], box[3]-box[1]) # convert x1 y1 x2 y2 into xywh
            #x, y, w, h = (box[0], box[1], box[2], box[3])
            #label = classnames[klass-1] # predicted classes are 1-based
            label = classnames[klass]
            if scores:
                label += ' ' + str(int(scores[k][i]*100)) + '%' 
            rect = mpl.patches.Rectangle((x, y),w,h,linewidth=4,edgecolor='#00000080',facecolor='none')
            ax.add_patch(rect)
            rect = mpl.patches.Rectangle((x, y),w,h,linewidth=2,edgecolor='#FFFF00FF',facecolor='none')
            ax.add_patch(rect)
            plt.text(x, y, label, size=16, ha="left", va="top", color='#FFFF00FF',
                     bbox=dict(boxstyle="round", ec='#00000080', fc='#0000004E', linewidth=3) )
            plt.text(x, y, label, size=16, ha="left", va="top", color='#FFFF00FF',
                     bbox=dict(boxstyle="round", ec='#FFFF00FF', fc='#0000004E', linewidth=1.5) )
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.02, hspace=0.02)
    plt.show()

In [58]:
# load all the metadata
DATA_PATH = '/content/ArTaxOr/Araneae'
json_filename_pattern = '/content/ArTaxOr/Araneae/annotations/*.json'
jpeg_filename_pattern = '/content/ArTaxOr/Araneae/*.jpg'

def load_json(filename, p):
    p.step()
    with tf.io.gfile.GFile(filename, 'r') as f:
        return json.load(f)
    
def filename_key(filename):
    path, filename = os.path.split(filename)
    dirname = os.path.split(path)[1]
    return os.path.join(dirname, filename)
    
def load_metadata(filename_pattern, jpeg_filename_pattern):
    print("Scanning directory...", end=' ')
    json_filenames = tf.io.gfile.glob(json_filename_pattern)
    jpeg_filenames = tf.io.gfile.glob(jpeg_filename_pattern)
    print(f"found {len(json_filenames)} metadata files and {len(jpeg_filenames)} image files.")
    print("Loading metadata")
    p = Progress(len(json_filenames))
    with concurrent.futures.ThreadPoolExecutor() as exe:
        data = exe.map(lambda x: load_json(x,p), json_filenames)
    # data as a dictionary for easier cross-referencing
    data = {filename_key(d['asset']['path']):d for d in data}
    return data, jpeg_filenames

RAW_METADATA, JPEG_FILENAMES = load_metadata(json_filename_pattern, jpeg_filename_pattern)

Scanning directory... found 2418 metadata files and 2418 image files.
Loading metadata

0%                                                                                              100%

In [59]:
RAW_METADATA

{'Araneae/ba547bd149dd.jpg': {'asset': {'format': 'jpg',
   'id': '82cb5c20e0fc1269cda1631e74af6364',
   'name': 'ba547bd149dd.jpg',
   'path': 'file:F:/ArTaxOr/Araneae/ba547bd149dd.jpg',
   'size': {'width': 2048, 'height': 1536},
   'state': 2,
   'type': 1},
  'regions': [{'id': '8b3eQZuK7',
    'type': 'RECTANGLE',
    'tags': ['Araneae'],
    'boundingBox': {'height': 326.6206896551724,
     'width': 436.9655172413793,
     'left': 728.2758620689655,
     'top': 740.0459770114942},
    'points': [{'x': 728.2758620689655, 'y': 740.0459770114942},
     {'x': 1165.2413793103449, 'y': 740.0459770114942},
     {'x': 1165.2413793103449, 'y': 1066.6666666666667},
     {'x': 728.2758620689655, 'y': 1066.6666666666667}]}],
  'version': '2.1.0'},
 'Araneae/7389e7d53773.jpg': {'asset': {'format': 'jpg',
   'id': '071a47e63206b6c9af56604ca2cbada8',
   'name': '7389e7d53773.jpg',
   'path': 'file:F:/ArTaxOr/Araneae/7389e7d53773.jpg',
   'size': {'width': 1491, 'height': 2048},
   'state': 2,
 