##### Copyright 2018 The TensorFlow Hub Authors.

Licensed under the Apache License, Version 2.0 (the "License");

In [4]:
# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Object Detection


<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/hub/tutorials/object_detection"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/hub/blob/master/examples/colab/object_detection.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/hub/blob/master/examples/colab/object_detection.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/hub/examples/colab/object_detection.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
  <td>
    <a href="https://tfhub.dev/s?q=google%2Ffaster_rcnn%2Fopenimages_v4%2Finception_resnet_v2%2F1%20OR%20google%2Ffaster_rcnn%2Fopenimages_v4%2Finception_resnet_v2%2F1"><img src="https://www.tensorflow.org/images/hub_logo_32px.png" />See TF Hub models</a>
  </td>
</table>

This Colab demonstrates use of a TF-Hub module trained to perform object detection.

## Setup


In [5]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
#@title Imports and function definitions

# For running inference on the TF-Hub module.
import tensorflow as tf

import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time
import json

import os

# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

## Example use

### Helper functions for downloading images and for visualization.

Visualization code adapted from [TF object detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py) for the simplest required functionality.

In [None]:
def display_image(image):
  fig = plt.figure(figsize=(20, 15))
  plt.grid(False)
  plt.imshow(image)


def download_and_resize_image(url, new_width=256, new_height=256,
                              display=False):
  # _, filename = tempfile.mkstemp(suffix=".jpg")
  # response = urlopen(url)
  # image_data = response.read()
  # image_data = BytesIO(image_data)
  pil_image = Image.open(url)
  pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS)
  pil_image_rgb = pil_image.convert("RGB")
  pil_image_rgb.save("resized.jpeg", format="JPEG", quality=90)
  print("Image downloaded to %s." % "resized.jpeg")
  if display:
    display_image(pil_image)
  return "resized.jpeg"


def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
  """Adds a bounding box to an image."""
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
  #using left,right,top,bottom, calculate the area of the box
  area = abs(right - left) * abs(bottom - top)

  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = top + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    text_width, text_height = font.getsize(display_str)
    margin = np.ceil(0.05 * text_height)
    draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
    draw.text((left + margin, text_bottom - text_height - margin),
              display_str,
              fill="black",
              font=font)
    text_bottom -= text_height - 2 * margin
  return area

def draw_boxes(image, boxes, class_names, scores, median_index, max_boxes=10, min_score=0.1):
  """Overlay labeled boxes on an image with formatted scores and label names."""
  colors = list(ImageColor.colormap.values())

  try:
    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf",
                              25)
  except IOError:
    print("Font not found, using default font.")
    font = ImageFont.load_default()

  box_areas = []
  for i in range(min(boxes.shape[0], max_boxes)):
    if i == median_index:
    #if scores[i] >= min_score:
      ymin, xmin, ymax, xmax = tuple(boxes[i])
      display_str = "{}: {}%".format(class_names[i].decode("ascii"),
                                     int(100 * scores[i]))
      color = colors[hash(class_names[i]) % len(colors)]
      image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
      area = draw_bounding_box_on_image(
          image_pil,
          ymin,
          xmin,
          ymax,
          xmax,
          color,
          font,
          display_str_list=[display_str])
      box_areas.append(area)
      np.copyto(image, np.array(image_pil))
  return box_areas, image

## Apply module

Load a public image from Open Images v4, save locally, and display.

In [None]:
# By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg
#image_url = "/content/drive/MyDrive/AI/Disco_Diffusion/images_out/x10_ant_long3/x10_ant_long3(-1)_0969.png"  #@param
#downloaded_image_path = download_and_resize_image(image_url, 1280, 856, True)

Pick an object detection module and apply on the downloaded image. Modules:
* **FasterRCNN+InceptionResNet V2**: high accuracy,
* **ssd+mobilenet V2**: small and fast.

In [None]:
module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1" #@param ["https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1", "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"]

detector = hub.load(module_handle).signatures['default']


In [None]:
def load_img(path):
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  return img

In [None]:
def run_detector(detector, path):
  img = load_img(path)

  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
  start_time = time.time()
  result = detector(converted_img)
  end_time = time.time()

  result = {key:value.numpy() for key,value in result.items()}

  print("Found %d objects." % len(result["detection_scores"]))
  print("Inference time: ", end_time-start_time)
  image = img.numpy()
  image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
  im_width, im_height = image_pil.size
  result["box_areas"] = []
  for i in range(min(result["detection_boxes"].shape[0], 10)):
    if result["detection_scores"][i] >= 0.1:
      ymin, xmin, ymax, xmax = result["detection_boxes"][i]
      (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
      result["box_areas"].append(abs(left - right) * abs(top - bottom))
  import numpy
  median_index = numpy.argsort(result["box_areas"])[len(result["box_areas"])//2]

  ymin, xmin, ymax, xmax = result["detection_boxes"][i]
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
  real_left = min(left, right)
  real_right = max(left,right)
  real_top = min(bottom, top)
  real_bottom = max(bottom,top)
  box_width = real_right - real_left
  box_height = real_top - real_bottom
  hor_center = real_left + (box_width//2)
  ver_center = real_top + (box_height//2)
  print('im_height', im_height, im_width)
  print('winner', result["detection_scores"][median_index], result['box_areas'][median_index], real_left,real_right,real_top,real_bottom, hor_center, ver_center)

  coord_file = open("/content/drive/MyDrive/AI/Object_Detection/request/center_coordinates.txt", "w")
  coord_file.write(str(real_left) +','+ str(real_right) +','+str(real_top) +','+str(real_bottom))
  coord_file.close()

  box_areas, image_with_boxes = draw_boxes(
      image, result["detection_boxes"],
      result["detection_class_entities"], result["detection_scores"],median_index)

  display_image(image_with_boxes)

# Setup Antarctic Prompts

In [None]:

antarctic_number_of_captions =  2#@param {type: 'number'}

!git clone https://github.com/dzryk/antarctic-captions.git
%cd antarctic-captions/
!git clone https://github.com/openai/CLIP
!pip3 install gdown
!pip3 install ftfy
!pip3 install transformers
!pip3 install git+https://github.com/PyTorchLightning/pytorch-lightning
# Download models and cache

!wget -m -np -c -U "eye02" -w 2 -P "/content/drive/MyDrive/AI/models/antarctic-captions/" -R "index.html*" "https://the-eye.eu/public/AI/models/antarctic-captions/"
import argparse
import io
import numpy as np
import torch
import torch.nn as nn
import requests
import pytorch_lightning as pl
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TVTFF

from CLIP import clip
from PIL import Image
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision.utils import make_grid

import model
import utils
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
# Helper functions
def fetch(url_or_path):
    if str(url_or_path).startswith('http://') or str(url_or_path).startswith('https://'):
        r = requests.get(url_or_path)
        r.raise_for_status()
        fd = io.BytesIO()
        fd.write(r.content)
        fd.seek(0)
        return fd
    return open(url_or_path, 'rb')

def load_image(img, preprocess):
    img = Image.open(fetch(img))
    return img, preprocess(img).unsqueeze(0).to(device)

def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = TVTFF.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

def display_grid(imgs):
    reshaped = [TVTFF.to_tensor(x.resize((256, 256))) for x in imgs]
    show(make_grid(reshaped))
    
def clip_rescoring(args, net, candidates, x):
    textemb = net.perceiver.encode_text(
        clip.tokenize(candidates).to(args.device)).float()
    textemb /= textemb.norm(dim=-1, keepdim=True)
    similarity = (100.0 * x @ textemb.T).softmax(dim=-1)
    _, indices = similarity[0].topk(args.num_return_sequences)
    return [candidates[idx] for idx in indices[0]]

def loader(args):
    cache = []
    with open(args.textfile) as f:
        for line in f:
            cache.append(line.strip())
    cache_emb = np.load(args.embfile)
    net = utils.load_ckpt(args)
    net.cache = cache
    net.cache_emb = torch.tensor(cache_emb).to(args.device)
    preprocess = clip.load(args.clip_model, jit=False)[1]
    return net, preprocess
    
def caption_image(path, args, net, preprocess):
    print('in caption_image')
    captions = []
    img, mat = load_image(path, preprocess)
    table, x = utils.build_table(mat.to(device), 
                        perceiver=net.perceiver,
                        cache=net.cache,
                        cache_emb=net.cache_emb,
                        topk=args.topk,
                        return_images=True)
    table = net.tokenizer.encode(table[0], return_tensors='pt').to(device)
    out = net.model.generate(table,
                            do_sample=args.do_sample,
                            num_beams=args.num_beams,
                            temperature=args.temperature,
                            top_p=args.top_p,
                            num_return_sequences=args.num_return_sequences)
    candidates = []
    for seq in out:
        candidates.append(net.tokenizer.decode(seq, skip_special_tokens=True))
    captions = clip_rescoring(args, net, candidates, x[None,:])
    for c in captions[:args.display]:
        print(c)
    #display_grid([img])
    return captions
# Settings
antarctic_filedir='/content/drive/MyDrive/AI/models/antarctic-captions/the-eye.eu/public/AI/models/antarctic-captions'
antarctic_args = argparse.Namespace(
    ckpt=f'{antarctic_filedir}/-epoch=05-vloss=2.163.ckpt',
    textfile=f'{antarctic_filedir}/postcache.txt',
    embfile=f'{antarctic_filedir}/postcache.npy',
    clip_model='ViT-B/16',
    topk=10,
    num_return_sequences=1000,
    num_beams=1,
    temperature=1.0,
    top_p=1.0,
    display=antarctic_number_of_captions,
    do_sample=True,
    device=device
)
# Load checkpoint and preprocessor
antarctic_net, antarctic_preprocess = loader(antarctic_args)
%cd ..



# Run the main loop for Object Detection And Antarctic Prompts

In [None]:
import time
import sys 
timeout_minutes=120 #@param {type:"integer"}
#how to count the time that spends on while loop
start_time = time.time()

object_detection_request_directory = '/content/drive/MyDrive/AI/Object_Detection/request'
antarctic_prompt_request_directory = '/content/drive/MyDrive/AI/antarctic_prompt/request/'

#if request_directory does not exist, create it
if not os.path.exists(object_detection_request_directory):
    os.makedirs(object_detection_request_directory)
if not os.path.exists(antarctic_prompt_request_directory):
    os.makedirs(antarctic_prompt_request_directory)
while True:
  #while request_directory is empty
  while len(os.listdir(object_detection_request_directory)) == 0 and len(os.listdir(antarctic_prompt_request_directory)) == 0:
      time.sleep(1)
      if (time.time() - start_time) > timeout_minutes*60:
          print('timeout')
          sys.exit()

  #check for an image to be object_detected
  if len(os.listdir(object_detection_request_directory)) > 0:
    for file in os.listdir(object_detection_request_directory):
      if file.endswith('.png'):
        file_name = file
        resized = download_and_resize_image(f'/content/drive/MyDrive/AI/Object_Detection/request/{file_name}', 256, 256, False)
        run_detector(detector, resized)
        #delete the file in the request_directory
        os.remove(f'/content/drive/MyDrive/AI/Object_Detection/request/{file_name}')
        start_time = time.time()

  #check for an image to have a prompt written for it
  if len(os.listdir(antarctic_prompt_request_directory)) > 0:
    for file in os.listdir(antarctic_prompt_request_directory):
      if file.endswith('.png'):
        init_image = antarctic_prompt_request_directory + 'init_image.png'
        new_antarctic_prompts = caption_image(init_image, antarctic_args, antarctic_net, antarctic_preprocess)[:antarctic_number_of_captions]
        #create a text file in the antarctic_prompt_request_directory with the new antarctic prompts
        #join new_anarctic_prompts with a ; and print it
        print(';'.join(new_antarctic_prompts))
        with open(antarctic_prompt_request_directory + 'antarctic_prompts.txt', 'w') as f:
          for prompt in new_antarctic_prompts:
            f.write(prompt)
        os.remove(antarctic_prompt_request_directory + 'init_image.png')
        #testing
        start_time = time.time()


NameError: name 'object_detection_request_directory' is not defined