In [None]:
!pip install pyparsing==2.4.2
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
sns.set_style('darkgrid')

from PIL import Image, ImageDraw
# import tensorflow as tf

import os
import ast
import sys
import time

import warnings
warnings.filterwarnings('ignore')

import greatbarrierreef


The below code will be useful in setting up TF object detection API. Part of it will be in the next notebook.Cheers! You can also refer to (https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html) for details.

In [None]:
# data imports
DATA_PATH = '/kaggle/input/tensorflow-great-barrier-reef'
images_path = os.path.join(DATA_PATH,'train_images')
df_test = pd.read_csv("../input/tensorflow-great-barrier-reef/test.csv")
df_train = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
sample_submission = pd.read_csv("../input/tensorflow-great-barrier-reef/example_sample_submission.csv")
example = np.load("../input/tensorflow-great-barrier-reef/example_test.npy")

In [None]:
df_train['img_path'] = os.path.join('../input/tensorflow-great-barrier-reef/train_images')+"/video_"+df_train.video_id.astype(str)+"/"+df_train.video_frame.astype(str)+".jpg"
df_train['annotations'] = df_train['annotations'].apply(lambda x: ast.literal_eval(x))
df_train['Number_bbox'] = df_train['annotations'].apply(lambda x:len(x)) 

In [None]:
def bbox_areas(annotations):
    if not annotations:
        return [0]
    area_list = []
    for annotation in annotations:
        area_list.append(annotation['width']*annotation['height'])
    return area_list
df_train["bbox_area"] = df_train["annotations"].apply(bbox_areas)
df_train["max_area"] = df_train["bbox_area"].apply(lambda x : max(x))
df_train["min_area"] = df_train["bbox_area"].apply(lambda x : min(x))
df_train.head()


In [None]:
def img_viz(df_train, id):
    image = df_train['img_path'][id]
    img = Image.open(image)
    
    for box in df_train['annotations'][id]:
        shape = [box['x'], box['y'], box['x']+box['width'], box['y']+box['height']]
        ImageDraw.Draw(img).rectangle(shape, outline ="red", width=3)
    display(img)
df_train.sort_values("max_area", ascending=False).head()


In [None]:
# data imports
DATA_PATH = '/kaggle/input/tensorflow-great-barrier-reef'
images_path = os.path.join(DATA_PATH,'train_images')
df_test = pd.read_csv("../input/tensorflow-great-barrier-reef/test.csv")
df_train = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
sample_submission = pd.read_csv("../input/tensorflow-great-barrier-reef/example_sample_submission.csv")
example = np.load("../input/tensorflow-great-barrier-reef/example_test.npy")
data = pd.read_csv("../input/datagreatbarrier/data.csv")

In [None]:
data

In [None]:
actual_train_data = data.query("Number_bbox>0")

In [None]:
!git clone https://github.com/tensorflow/models.git
# !cd models/research
# !export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
# !protoc object_detection/protos/*.proto --python_out=.

In [None]:
!wget -O protobuf.zip https://github.com/protocolbuffers/protobuf/releases/download/v3.19.0/protoc-3.19.0-linux-x86_64.zip -q
!unzip -o protobuf.zip
!rm protobuf.zip

In [None]:
!pwd
!cd models/research
!pwd

In [None]:
# %bash cd models/research
os.chdir('models/research')
# !pwd
!protoc object_detection/protos/*.proto --python_out=.

In [None]:
import os

os.environ['AUTOGRAPH_VERBOSITY'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONPATH']=os.environ['PYTHONPATH']+':/kaggle/models/research/slim:/kaggle/models/research'
os.environ['PYTHONPATH']



In [None]:
!cp object_detection/packages/tf2/setup.py .
!python -m pip install --use-feature=2020-resolver .

In [None]:
!pwd
!python object_detection/builders/model_builder_tf2_test.py

In [None]:
data['annotations'] = data['annotations'].apply(eval)


In [None]:
os.chdir("/kaggle/working")
!mkdir workspace workspace/train_images workspace/test_images

In [None]:
label_map = """item {
    id: 1
    name: 'starfish'
}"""
with open("/kaggle/working/workspace/annotations", "w") as label_file:
    label_file.write(label_map)
label_file.close()
    

In [None]:
# !cp /kaggle/input/reef-labels/labelmap.pbtxt /kaggle/working/workspace/annotations

In [None]:
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from PIL import ImageDraw
from PIL import Image
import pandas as pd
import numpy as np
import json
import copy
import os
import cv2
import ast

# functions


In [None]:
image_annotation_dict = dict(zip(data.img_path, data.annotations))
annotated_images = {key:value for key, value in image_annotation_dict.items() if len(value)>0}

In [None]:
def get_xml_template(image_path, folder="/kaggle/working/workspace/train_images"):
    img = Image.open(image_path)
    img_width, img_height = img.size
    file_name = '_'.join(image_path.split('/')[-2:])
    image_info_dict = {
    "folder": folder.split('/')[-1],
    "filename": file_name,
    "path": folder + '/' + file_name,
    "source":{"database": "Unknown"},
    "size":{"width": str(img_width), "height": str(img_height), "depth":"3"},
    "segmented": "0",
        }
    return image_info_dict

def get_annotation_template(annotation):
    annotation_dict = {"name": "starfish",
                      "pose": "unknown",
                      "difficult": "0",
                      "bbox":{"xmin": str(annotation['x']),
                              "xmax": str(annotation['x']+annotation['width']),
                              "ymin": str(annotation['y']),
                               "ymax": str(annotation['y']+annotation['height'])}}
    return annotation_dict

In [None]:
import xml.etree.ElementTree as ET

def add_child(key, value, parent):
    child = ET.SubElement(parent, key)
    if isinstance(value, dict):
        for key_child, value_child in list(value.items()):
            add_child(key_child, value_child, child)
    else:
        child.text = value
    return  

def get_xml(image_data,folder="/kaggle/working/workspace/train_images"):
    image_info = get_xml_template(image_data[0], folder=folder)
    file_path = folder+'/'+image_info["filename"]
    xml_file_path = file_path.split('.')[0] +'.xml'
    annotation_list = image_data[1]
    root = ET.Element("annotations")
    for k,v in image_info.items():
        add_child(k,v,root)
    for annotation in annotation_list:
        annotation_root = ET.SubElement(root, "object")
        annotation_info = get_annotation_template(annotation)
        for k,v in annotation_info.items():
            add_child(k,v,annotation_root)
    return root,file_path,xml_file_path



In [None]:
from sklearn.model_selection import train_test_split
import shutil
train_data, test_data = train_test_split(data,random_state=22, test_size=0.1)

In [None]:
def transfer_images_xml(image_data_list, folder="/kaggle/working/workspace/train_images"):
    for image_data in tqdm(image_data_list):
        xml_details, image_path, xml_path = get_xml(image_data, folder=folder)
        shutil.copy(image_data[0], image_path)
        xml_tree = ET.ElementTree(xml_details)
        xml_tree.write(xml_path)

In [None]:
test_image_annotation_dict = tuple(zip(test_data.img_path, test_data.annotations))


In [None]:
transfer_images_xml(test_image_annotation_dict, folder="/kaggle/working/workspace/test_images")

In [None]:
# !rm /kaggle/working/workspace/test_images/*
# !rm /kaggle/working/workspace/train_images/*
