In [16]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import Conv2D, Flatten, GlobalAveragePooling2D, MaxPooling2D
from keras.models import Model
from keras.callbacks import CSVLogger, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from models.model import DARKNET19_ARCHITECTURE, INPUT_SIZE, build_model
from keras.utils import np_utils
import albumentations
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import json
import os

ImportError: cannot import name 'SoftmaxLoss' from 'keras.losses' (/home/mushihuahua/.local/lib/python3.10/site-packages/keras/losses.py)

In [2]:
np.random.seed(42)
tf.random.set_seed(42)

WIDTH = 224
HEIGHT = 224 
BATCH_SIZE = 32
TEST_SPLIT = 0.2
LEARNING_RATE = 0.001
EPOCHS = 10

## 1. Data

In [6]:
data = {}
with open("bboxes.json", "r") as f:
    data = json.load(f)

# Get data and transform left hand labels to 0 and right hand labels to 1
df = pd.DataFrame.from_dict(data).T
label_names = ['palmar left', 'palmar right']
df.replace(label_names , [0, 1], inplace=True)

# # Count the number of left and right hands in the data
# labels = df["label"].value_counts()
# print(labels)

# Split the data into training and testing sets
train, test = train_test_split(df, test_size=TEST_SPLIT)
print(train.shape, test.shape)

train_images, train_bboxes, train_labels = train.index.to_numpy(), train["bbox"].to_numpy(), train["label"].to_numpy()
test_images, test_bboxes, test_labels = test.index.to_numpy(), test["bbox"].to_numpy(), test["label"].to_numpy()

train_labels = np_utils.to_categorical(train_labels, 2)
test_labels = np_utils.to_categorical(test_labels, 2)


(4247, 2) (1062, 2)


## 2. Data Preprocessing
### 2.1 Resizing & Normalisation

In [7]:
def read_image(path, bbox, label):
    
    filename = path.decode("utf-8")
    path = f"data/{filename}"
    image = cv2.imread(path)
    image = cv2.rotate(image, cv2.ROTATE_180)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    transform = albumentations.Compose(
        [albumentations.Resize(height=HEIGHT, width=WIDTH, always_apply=True)],
        bbox_params=albumentations.BboxParams(format='coco'))

    transformed = transform(image=image, bboxes=[np.concatenate([bbox,[0]])])
    image, bbox = transformed["image"], transformed["bboxes"][0]

    # cv2.circle(image,(int(bbox[0]),int(bbox[1])), 2, (0,255,0), -1)
    # cv2.circle(image,(int(bbox[0])+int(bbox[2]),int(bbox[1])+int(bbox[3])), 2, (0,255,0), -1)
    # plt.imshow(image)
    # plt.show()
    
    norm_image = tf.cast(image, tf.float32) / 255.

    x1, y1, x2, y2 = bbox[0]/WIDTH, bbox[1]/HEIGHT, bbox[2]/WIDTH, bbox[3]/HEIGHT
    norm_bbox = np.array([x1, y1, x2, y2], dtype=np.float32)

    # print(label.shape)

    return norm_image, norm_bbox, label


### 2.2 Transforming data to Tensors

In [29]:
def parse(image, bbox, label):
    image, bbox, label = tf.numpy_function(read_image, [image, bbox, label], [tf.float32, tf.float32, tf.float32])
    image.set_shape((WIDTH, HEIGHT, 3))
    bbox.set_shape((4))
    label.set_shape((2))
    return image, (bbox, label)

# print(train_labels)
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, list(train_bboxes), train_labels))
train_dataset = train_dataset.map(parse).batch(5)

test_dataset = tf.data.Dataset.from_tensor_slices((test_images, list(test_bboxes), test_labels))
test_dataset = test_dataset.map(parse).batch(5)

for X, [bbox, label]  in train_dataset:
    print(X.shape, bbox.shape, label.shape)
    break


(5, 224, 224, 3) (5, 4) (5, 2)


2023-05-01 15:25:41.201842: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype float and shape [4247,2]
	 [[{{node Placeholder/_2}}]]


## The CNN Model

In [30]:
model = build_model(DARKNET19_ARCHITECTURE, INPUT_SIZE)
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss={
    "label": "binary_crossentropy",
    "bbox": "mean_squared_error"
},
metrics=["accuracy"])

callbacks = [
    ModelCheckpoint("models", verbose=1, save_best_only=True),
    CSVLogger("models", append=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
    EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False)
]

model.fit(train_dataset, epochs=EPOCHS, validation_data=test_dataset, callbacks=callbacks)

Epoch 1/10
 19/850 [..............................] - ETA: 26:33 - loss: 63.7923 - bbox_loss: 3.0946 - label_loss: 0.5936

KeyboardInterrupt: 