# YOLO v1 

# Setup

In [1]:
import os
import sys
import json
import pathlib
from typing import (
    List,
    Dict,
    Tuple
)

import numpy as np
import tensorflow as tf
from tensorflow import keras   # MUST to make sure of using tensorflow.keras, not keras
from tensorflow.keras.models import (
    Model,
)
from tensorflow.keras.layers import (
    Layer,
    Normalization,
)
import tensorflow_datasets as tfds

## PYTHONPATH

In [2]:
path_to_lib: str = str(pathlib.Path(os.path.join(os.getcwd(), "../../..")).resolve())
assert isinstance(path_to_lib, str)
sys.path.append(path_to_lib)

In [3]:
%load_ext autoreload
%autoreload 2

from util_opencv.image import (
    get_image,
    show_image,
)
from util_tf.nn import (
    train,
    get_early_stopping_callback,
    get_tensorboard_callback,
)
from util_tf.tfds.voc import (
    generate_yolo_v1_label_from_pascal_voc,
    generate_yolo_v1_data_from_pascal_voc,
    _generate_yolo_v1_data_from_pascal_voc,
    PASCAL_VOC_CLASSES,
)

from constant import (
    DEBUG_LEVEL,
    TYPE_FLOAT,
    YOLO_V1_PREDICTION_NUM_CLASSES,
)
from model import (
    layers_config,
    input_shape,
    YOLOModel,
)

---
# Data

## TFDS PASCAL VOC

In [4]:
voc, info = tfds.load(
    name='voc', 
    # split='train',
    data_dir="/Volumes/SSD/data/tfds/",
    with_info=True,
)
info

INFO:absl:No config specified, defaulting to config: voc/2007
INFO:absl:Load dataset info from /Volumes/SSD/data/tfds/voc/2007/4.0.0
INFO:absl:Reusing dataset voc (/Volumes/SSD/data/tfds/voc/2007/4.0.0)
INFO:absl:Constructing tf.data.Dataset voc for split None, from /Volumes/SSD/data/tfds/voc/2007/4.0.0


tfds.core.DatasetInfo(
    name='voc',
    full_name='voc/2007/4.0.0',
    description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge,
    corresponding to the Classification and Detection competitions.
    
    In the Classification competition, the goal is to predict the set of labels
    contained in the image, while in the Detection competition the goal is to
    predict the bounding box and label of each individual object.
    annotations.
    """,
    config_description="""
    This dataset contains the data from the PASCAL Visual Object Classes Challenge
    2007, a.k.a. VOC2007.
    
    A total of 9963 images are included in this dataset, where each image
    contains a set of objects, out of 20 different classes, making a total of
    24640 annotated objects.
    
    """,
    homepage='http://host.robots.ox.ac.uk/pascal/VOC/voc2007/',
    data_path='/Volumes/SSD/data/tfds/voc/2007/4.0.0',
    file_format=tfrecord,
    download_size=868

## YOLO v1 dataset

Dataset in ```(input,label)``` format where input is ```(448,448,3)``` RGB image and label is YOLO v1 label in ```(C,P)``` format where ```P=(cp,x,y,w,h)```.

In [5]:
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()
# train = voc['train'].flat_map(generate_yolo_v1_data_from_pascal_voc)
train = voc['train'].flat_map(generate_yolo_v1_label_from_pascal_voc)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [6]:
for i in train:
    print(i)

2023-03-09 23:32:40.043172: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-03-09 23:32:40.142526: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[0,0] = [0, -1] does not index into shape [1,20]
2023-03-09 23:32:40.146662: W tensorflow/core/framework/op_kernel.cc:1818] UNKNOWN: InvalidArgumentError: {{function_node __wrapped__TensorScatterUpdate_device_/job:localhost/replica:0/task:0/device:CPU:0}} indices[0,0] = [0, -1] does not index into shape [1,20] [Op:TensorScatterUpdate]
Traceback (most recent call last):

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    return func(device, token, args)

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
    outputs = self._call(device, args)

  File "/Users/oonisim/venv/tf/lib/python3.9/s

tf.Tensor(
[[[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  ...
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]
  [0.         0.         0.         ... 0.         0.         0.        ]]

 [[0.         0.         0.         ... 0.         0.         0.        ]
  [0.      

UnknownError: {{function_node __wrapped__IteratorGetNext_output_types_1_device_/job:localhost/replica:0/task:0/device:CPU:0}} InvalidArgumentError: {{function_node __wrapped__TensorScatterUpdate_device_/job:localhost/replica:0/task:0/device:CPU:0}} indices[0,0] = [0, -1] does not index into shape [1,20] [Op:TensorScatterUpdate]
Traceback (most recent call last):

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    return func(device, token, args)

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
    outputs = self._call(device, args)

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
    ret = self._func(*args)

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/data/ops/structured_function.py", line 212, in py_function_wrapper
    ret = self._func(*nested_args)

  File "/Users/oonisim/home/repository/git/oonisim/python-programs/lib/util_tf/tfds/voc.py", line 355, in generate_yolo_v1_label_from_pascal_voc
    label = _generate_yolo_v1_labels_from_pascal_voc(

  File "/Users/oonisim/home/repository/git/oonisim/python-programs/lib/util_tf/tfds/voc.py", line 320, in _generate_yolo_v1_labels_from_pascal_voc
    classes = generate_yolo_v1_class_predictions(labels=indices, dtype=dtype)

  File "/Users/oonisim/home/repository/git/oonisim/python-programs/lib/util_tf/tfds/voc.py", line 186, in generate_yolo_v1_class_predictions
    classes = tf.tensor_scatter_nd_update(

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None

  File "/Users/oonisim/venv/tf/lib/python3.9/site-packages/tensorflow/python/framework/ops.py", line 7215, in raise_from_not_ok_status
    raise core._status_to_exception(e) from None  # pylint: disable=protected-access

tensorflow.python.framework.errors_impl.InvalidArgumentError: {{function_node __wrapped__TensorScatterUpdate_device_/job:localhost/replica:0/task:0/device:CPU:0}} indices[0,0] = [0, -1] does not index into shape [1,20] [Op:TensorScatterUpdate]


	 [[{{node EagerPyFunc}}]] [Op:IteratorGetNext]

In [None]:
for _ in range(20):
    next(x)

In [None]:
i = 0
while True:
    i += 1
    example = next(x)
    print()
    image = example[0]
    label = example[1]

### Input Image

In [None]:
print(image.shape)
show_image(image=image.astype(np.uint8), bgr_to_rgb=False)

### Label

In [None]:
PASCAL_VOC_CLASSES[np.argmax(label[:YOLO_V1_PREDICTION_NUM_CLASSES]==1)]

---

# Model

<img src="./image/architecture.png" align="left"/>


* yolo.cfg

```
[net]
batch=64
subdivisions=64
height=448
width=448
channels=3
momentum=0.9
decay=0.0005

learning_rate=0.001
policy=steps
steps=200,400,600,20000,30000
scales=2.5,2,2,.1,.1
max_batches = 40000

[crop]
crop_width=448
crop_height=448
flip=0
angle=0
saturation = 1.5
exposure = 1.5

[convolutional]
filters=64
size=7
stride=2
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky

#######

[convolutional]
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=3
stride=2
pad=1
filters=1024
activation=leaky

[convolutional]
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=3
stride=1
pad=1
filters=1024
activation=leaky

[connected]
output=4096
activation=leaky

[dropout]
probability=.5

[connected]
output= 1470
activation=linear

[detection]
classes=20
coords=4
rescore=1
side=7
num=2
softmax=0
sqrt=1
jitter=.2

object_scale=1
noobject_scale=.5
class_scale=1
coord_scale=5
```

In [None]:
input_shape

In [None]:
model: Model = YOLOModel()

# Normalize

In [None]:
norm: Layer = model.get_layer(name="norm")

In [None]:
norm.axis   # standardize along axis=-1 (depth wise standardization where each pixel is a feature)

In [None]:
def f(*args):
    return args[0]

In [None]:
X_train = train.map(f)
for i in X_train.take(2):
    print(i.shape)

In [None]:
norm.adapt(data=X_train.take(20))

In [None]:
def f(*args):
    return args[0]

In [None]:
for i in train.take(2).map(f): 
    print(i.shape)

In [None]:
tf.config.run_functions_eagerly(True)

In [None]:
def g(*args):
    print(type(args))
    return args

In [None]:
norm = tf.keras.layers.Normalization(input_shape=(448,448,3))

In [None]:
for i in train.take(21).map(g):
    print(type(i))

In [None]:
def f(record):
    return record['image']

In [None]:
norm.adapt(voc['train'].map(f))

In [None]:
norm.weights

In [None]:
S = 7
C = 20
P = 5
box = tf.constant(np.random.random(size=(5,2)))

In [None]:
tf.math.floor(box * 7)

In [None]:
def fn(xy):
    grid_row = tf.cast(tf.math.floor(S * xy[1]), dtype=tf.int32)   # y
    grid_col = tf.cast(tf.math.floor(S * xy[0]), dtype=tf.int32)   # x
    return tf.stack([grid_row, grid_col], axis=-1)

update_indices = tf.map_fn(
    fn=fn,
    elems=tf.stack([box[..., 0], box[..., 1]], axis=-1),
    fn_output_signature=tf.TensorSpec(
        shape=(2,),
        dtype=tf.dtypes.int32,
        name=None
    )
)

In [None]:
update_indices

In [None]:
result = tf.zeros(shape=(S, S, C+P), dtype=tf.float32)
tf.tensor_scatter_nd_update(
    tensor=result,
    indices=update_indices,
    updates=updates
)[1,3]

In [None]:
updates = tf.constant([
    1.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    0.0,
    1.0,
    0.1,
    0.2,
    0.3,
    0.4
] * box.shape[0])

In [None]:
updates = tf.reshape(updates, (box.shape[0],(C+P)))