In [33]:
import numpy as np
import gc

import tf2onnx

from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import (
    Dense,
    Flatten,
    Conv2D,
    MaxPooling2D,
    Dropout,
    Input,
    BatchNormalization,
    Concatenate,
    GlobalAveragePooling2D,
    Conv2DTranspose,
    concatenate,
)

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence, plot_model

from tensorflow.keras.regularizers import l2

from tensorflow.keras.preprocessing.image import (
    load_img,
    img_to_array,
    ImageDataGenerator,
)

from tensorflow.keras.datasets import mnist
import tensorflow_datasets as tfds
import tensorflow as tf
import tensorflow_addons as tfa

import pandas as pd
import matplotlib.pyplot as plt

import os

from sklearn.metrics import jaccard_score
from sklearn.model_selection import train_test_split

import cv2

from PIL import Image

from glob import glob
from pathlib import Path
from typing import Generator

import random

random_state = 123456
random.seed(random_state)

In [34]:
def smooth_data(data_frame, window_size=5):

    window = 2 * window_size + 1  
    return data_frame.rolling(window=window, min_periods=1, center=True).mean()

In [35]:
def load_data(csv_paths) -> Generator[np.array, float, float]:
    for csv_path in csv_paths:
        csv_data = pd.read_csv(csv_path, header=None)
        csv_data.columns = ["id", "forward", "left"]
        
        csv_data = smooth_data(csv_data)
        
        image_paths = glob(f"{csv_path.removesuffix('.csv')}/*.jpg")
        random.shuffle(image_paths)
        
        for image_path in image_paths:
            image_data = load_img(image_path)
            image_number = int(Path(image_path).name.removesuffix(".jpg"))
            row = csv_data[csv_data["id"]==image_number]
            if row.empty:
                continue
            yield image_data, row["forward"].values[0], row["left"].values[0]
        

In [36]:
def load_data_preprocess(paths, image_size, batch_size):
    def preprocess(image, image_size):
        image = img_to_array(image).astype(np.uint8)
        
        image = cv2.resize(image, (image_size, image_size))
        
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img_clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(img_gray)
        img_blurred = cv2.GaussianBlur(img_clahe, (3, 3), 0)
        
        img_crop = img_blurred[image_size//2:image_size, 0:image_size]
        img_crop = cv2.resize(img_crop, (image_size, image_size))
        
        pil_image = Image.fromarray(img_crop.astype(np.uint8), "L")
        
        return pil_image
    
    def augment(image, forward, left):
        image_flip = image.transpose(Image.FLIP_LEFT_RIGHT)
        left_flipped = -left
        forward_flipped = forward
        return image_flip, forward_flipped, left_flipped

    data_loader = load_data(paths)
    batch_images = []
    batch_labels = []

    for image, forward, left in data_loader:

        image = preprocess(image, image_size)
        
        image_flipped, forward_flipped, left_flipped = augment(image, forward, left)
        
        image = img_to_array(image).astype(np.float32) / 255.0
        image_flipped = img_to_array(image_flipped).astype(np.float32) / 255.0
        
        batch_images.append(image_flipped)
        batch_labels.append([forward_flipped, left_flipped])
    
        batch_images.append(image)
        batch_labels.append([forward, left])

        if len(batch_images) == 2 * batch_size:
            yield np.array(batch_images), np.array(batch_labels)
            batch_images = []
            batch_labels = []
            
    if batch_images:
        yield np.array(batch_images), np.array(batch_labels)

        

In [37]:
def generator_wrapper(paths):
    for batch_images, batch_labels in load_data_preprocess(paths, 64, 32):
        yield batch_images, batch_labels

In [38]:
csv_paths = glob("../dataset/*.csv")
random.shuffle(csv_paths)


test_paths = csv_paths[:3]
train_paths = csv_paths[3:]

#val_path = train_val_paths[0]        
#train_paths = train_val_paths[1:] 

img_size = 64
batch_size = 16

test_loader = load_data_preprocess(test_paths, img_size, batch_size)
train_loader = load_data_preprocess(train_paths, img_size, batch_size)


train_dataset = tf.data.Dataset.from_generator(
    lambda: generator_wrapper(train_paths),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 64, 64, 1], [None, 2])
).repeat()

# val_dataset = tf.data.Dataset.from_generator(
#     lambda: generator_wrapper([val_path]),
#     output_types=(tf.float32, tf.float32),
#     output_shapes=([None, 64, 64, 1], [None, 2])
# ).repeat()

test_dataset = tf.data.Dataset.from_generator(
    lambda: generator_wrapper(test_paths),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 64, 64, 1], [None, 2])
).repeat()


In [39]:
total_train_samples = sum(
    [sum(1 for _ in load_data_preprocess([path], 64, batch_size)) * batch_size * 2 for path in train_paths]
)
steps_per_epoch = total_train_samples // batch_size

In [40]:
total_val_samples = sum(
    [sum(1 for _ in load_data_preprocess([path], 64, batch_size)) * batch_size * 2 for path in [val_path]]
)
val_steps = total_val_samples // batch_size

In [41]:
total_test_samples = sum(
    [sum(1 for _ in load_data_preprocess([path], 64, batch_size)) * batch_size * 2 for path in test_paths]
)
test_steps = total_test_samples // batch_size

In [42]:
def weighted_mse_loss(forward_weight=3.0, left_weight=7.0):
    def loss(y_true, y_pred):
        forward_true = y_true[:, 0]
        left_true = y_true[:, 1]

        forward_pred = y_pred[:, 0]
        left_pred = y_pred[:, 1]

        forward_mse = tf.reduce_mean(tf.square(forward_true - forward_pred))
        left_mse = tf.reduce_mean(tf.square(left_true - left_pred))

        return forward_weight * forward_mse + left_weight * left_mse

    return loss

In [43]:
def build_model(image_size):
    inputs = Input(shape=(image_size, image_size, 1))
    x = Conv2D(16, (3, 3), activation="relu", padding="same",
           kernel_regularizer=l2(1e-4))(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(32, (3, 3), activation="relu", padding="same")(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Flatten()(x)
    x = Dense(64, activation="relu", kernel_regularizer=l2(1e-4))(x)
    x = Dropout(0.5)(x)  
    x = Dense(32, activation="relu")(x)
    x = Dropout(0.3)(x)
    outputs = Dense(2, activation="tanh")(x)

    return Model(inputs = inputs, outputs = outputs)


In [44]:
def mse_forward(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true[:, 0] - y_pred[:, 0]))

def mse_left(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true[:, 1] - y_pred[:, 1]))

In [45]:
model = build_model(image_size=img_size)
model.summary()


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 64, 64, 1)]       0         
                                                                 
 conv2d_4 (Conv2D)           (None, 64, 64, 16)        160       
                                                                 
 batch_normalization_4 (Bat  (None, 64, 64, 16)        64        
 chNormalization)                                                
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 32, 32, 16)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 32, 32, 32)        4640      
                                                                 
 batch_normalization_5 (Bat  (None, 32, 32, 32)        128 

In [46]:
model.compile(
    optimizer='adam',
    loss=weighted_mse_loss(forward_weight=2.0, left_weight=3.0),
    metrics=['mse', mse_forward, mse_left],
)

In [48]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=20,
    steps_per_epoch=steps_per_epoch,
    
    validation_steps=test_steps,
    #callbacks=[early_stopping]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7f4788374ac0>

In [49]:
spec = (tf.TensorSpec((None, 64, 64, 1), tf.float32, name="input"),)

onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=11)

2025-05-20 20:54:46.978165: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-20 20:54:46.978422: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2025-05-20 20:54:46.979216: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2025-05-20 20:54:46.980103: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-20 20:54:46.980351: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-20 20:54:46.980424: I tensor

In [50]:
with open("model_opset11_v4.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

In [51]:
results = model.evaluate(test_loader, return_dict=True)

print("Test Results:")
for name, value in results.items():
    print(f"{name}: {value:.4f}")

     92/Unknown - 11s 120ms/step - loss: 0.2428 - mse: 0.0417 - mse_forward: 0.0300 - mse_left: 0.0533

KeyboardInterrupt: 

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error

# Collect the true labels and predictions
y_true_list = []
y_pred_list = []

for batch_x, batch_y in test_dataset.take(test_steps*batch_size):  
    preds = model.predict(batch_x)
    y_pred_list.append(preds)
    y_true_list.append(batch_y.numpy())

# Convert lists to arrays
y_pred_all = np.vstack(y_pred_list)
y_true_all = np.vstack(y_true_list)

# Compute total MSE
total_mse = mean_squared_error(y_true_all, y_pred_all)

# Compute per-output MSE
mse_forward = mean_squared_error(y_true_all[:, 0], y_pred_all[:, 0])
mse_left = mean_squared_error(y_true_all[:, 1], y_pred_all[:, 1])

print(f"Total MSE: {total_mse:.4f}")
print(f"MSE Forward: {mse_forward:.4f}")
print(f"MSE Left: {mse_left:.4f}")

