In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import pytesseract as pt
import plotly.express as px
import matplotlib.pyplot as plt
import xml.etree.ElementTree as xet

from glob import glob
from skimage import io
from shutil import copy
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Dense, Flatten, Input, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def generate_preprocess():
    df = pd.read_csv('/kaggle/input/car-licence/Dataset/train_labels.csv')
    df['image_name'] = df['filename'].apply(lambda link: os.path.basename(link))
    df = df.sort_values(by = 'image_name')
    return df 
   
def build_object_detection_model():
    inception_resnet = InceptionResNetV2(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
    headmodel = inception_resnet.output
    headmodel = Flatten()(headmodel)
    headmodel = Dense(512, activation="relu")(headmodel)
    headmodel = Dropout(0.25)(headmodel)
    headmodel = Dense(256, activation="relu")(headmodel)
    headmodel = Dropout(0.25)(headmodel)
    headmodel = Dense(4, activation='sigmoid')(headmodel)

    model = Model(inputs=inception_resnet.input, outputs=headmodel)

    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))
    return model

def train_object_detection_model(model, df):
    
    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1)
    train_generator = datagen.flow_from_dataframe(
        df,
        directory="/kaggle/input/car-licence/Dataset/Cars",
        x_col="image_name",
        y_col=["xmin", "ymin", "xmax", "ymax"],
        target_size=(224, 224),
        batch_size=16, 
        class_mode='raw',
        subset="training")

    validation_generator = datagen.flow_from_dataframe(
        df,
        directory="/kaggle/input/car-licence/Dataset/Cars",
        x_col="image_name",
        y_col=["xmin", "ymin", "xmax", "ymax"],
        target_size=(224, 224),
        batch_size=16, 
        class_mode='raw',
        subset="validation")
    
    tfb = TensorBoard('object_detection')
    history = model.fit(train_generator, batch_size=16, epochs=100,
                        validation_data=validation_generator, callbacks=[tfb])
    
    return history

def object_detection(path, model):
    image = load_img(path)
    image = np.array(image, dtype=np.uint8)
    image1 = load_img(path, target_size=(224, 224))

    image_arr_224 = image1 / 255.0
    h, w, d = image.shape
    test_arr = image_arr_224.reshape(1, 224, 224, 3)

    coords = model.predict(test_arr)

    denorm = np.array([w, w, h, h])
    coords = coords * denorm
    coords = coords.astype(np.int32)
    
    coords[:, 0] = np.clip(coords[:, 0], 0, w - 1)
    coords[:, 1] = np.clip(coords[:, 1], 0, h - 1)
    coords[:, 2] = np.clip(coords[:, 2], 0, w - 1)
    coords[:, 3] = np.clip(coords[:, 3], 0, h - 1)
    
    xmin, xmax, ymin, ymax = coords[0]
    pt1 = (xmin, ymin)
    pt2 = (xmax, ymax)
    print("Bounding Box Coordinates:", pt1, pt2)
    cv2.rectangle(image, pt1, pt2, (0, 255, 0), 3)
    
    return image, coords

def extract_text_from_image(image, cods):
    xmin, xmax, ymin, ymax = cods[0]
    roi = image[ymin:ymax, xmin:xmax]
    
    text = pt.image_to_string(roi)
    return text

In [None]:
df = generate_preprocess()
df.head()

In [None]:
image_folder = "/kaggle/input/car-licence/Dataset/Cars/"
# Read the image dimensions and normalize the bounding box values
for index, row in df.iterrows():
    image_path = os.path.join(image_folder, row["image_name"])
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    df.at[index, "xmin"] = row["xmin"] / image_width
    df.at[index, "ymin"] = row["ymin"] / image_height
    df.at[index, "xmax"] = row["xmax"] / image_width
    df.at[index, "ymax"] = row["ymax"] / image_height
    
df.head()

In [None]:
model = build_object_detection_model()

In [None]:
history = train_object_detection_model(model, df)

In [None]:
test_image_path = '/kaggle/input/car-licence/Dataset/Cars/101.jpg'
detected_image, bounding_box_coords = object_detection(test_image_path, model)

extracted_text = extract_text_from_image(detected_image, bounding_box_coords)
print("Extracted Text:", extracted_text)

In [None]:
fig = px.imshow(detected_image)
fig.update_layout(width=700, height=500, margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Figure 14')
img = np.array(load_img(test_image_path))
xmin, xmax, ymin, ymax = bounding_box_coords[0]
roi = img[ymin:ymax, xmin:xmax]
fig = px.imshow(roi)
fig.update_layout(width=350, height=250, margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Cropped image')

##**Here I was trying Bayesian Optimization but it was taking too long**##

In [None]:
import os
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from bayes_opt import BayesianOptimization
from functools import partial

# Step 1: Enable GPU support
physical_devices = tf.config.experimental.list_physical_devices("GPU")
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

def load_and_extract_labels():
    df = pd.read_csv('/kaggle/input/car-licence/Dataset/train_labels.csv')
    image_folder = "/kaggle/input/car-licence/Dataset/Cars/"
    df['image_name'] = df['filename'].apply(lambda link: os.path.basename(link))
    df = df.sort_values(by = 'image_name')
    # Read the image dimensions and normalize the bounding box values
    for index, row in df.iterrows():
        image_path = os.path.join(image_folder, row["image_name"])
        image = cv2.imread(image_path)
        image_height, image_width, _ = image.shape

        df.at[index, "xmin"] = row["xmin"] / image_width
        df.at[index, "ymin"] = row["ymin"] / image_height
        df.at[index, "xmax"] = row["xmax"] / image_width
        df.at[index, "ymax"] = row["ymax"] / image_height
    
    image_path = df['filename'].apply(lambda x: os.path.join('/kaggle/input/car-licence/Dataset/Cars/',os.path.basename(x)))
    labels = df[['xmin', 'xmax', 'ymin', 'ymax']].values
    data = []
    output = []

    for ind in range(len(image_path)):
        image = image_path[ind]
        img_arr = cv2.imread(image)
        if img_arr is None:
            print(f"Error: Could not read image {image}")
            continue
        h, w, d = img_arr.shape
        
        # Preprocessing
        load_image = load_img(image, target_size=(299, 299))
        load_image_arr = img_to_array(load_image)
        norm_load_image_arr = load_image_arr / 255.0

        # Normalization to labels
        xmin, xmax, ymin, ymax = labels[ind]
        nxmin, nxmax = xmin / w, xmax / w
        nymin, nymax = ymin / h, ymax / h
        label_norm = (nxmin, nxmax, nymin, nymax)
        data.append(norm_load_image_arr)
        output.append(label_norm)

    X = np.array(data, dtype=np.float32)
    y = np.array(output, dtype=np.float32)

    x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
    return df, x_train, x_test, y_train, y_test

def build_object_detection_model(learning_rate, dense_units, l2_reg):
    inception_resnet = InceptionResNetV2(weights="imagenet", include_top=False, input_tensor=Input(shape=(299, 299, 3)))
    headmodel = inception_resnet.output
    headmodel = Flatten()(headmodel)
    headmodel = Dense(dense_units, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(l2_reg))(headmodel)
    headmodel = Dense(4, activation='sigmoid')(headmodel)

    model = Model(inputs=inception_resnet.input, outputs=headmodel)

    model.compile(loss=iou_loss, optimizer=Adam(learning_rate=learning_rate))
    return model

# Step 2: Modify the train_evaluate function to take regularization parameters
def train_evaluate(learning_rate, batch_size, x_train, y_train, x_test, y_test, dense_units, l2_reg, df):
    batch_size = int(batch_size)

    model = build_object_detection_model(learning_rate, dense_units, l2_reg)
    #model.summary()

    datagen = ImageDataGenerator(
        rescale=1. / 255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.1
    )

    train_generator = datagen.flow_from_dataframe(
        df,
        directory="/kaggle/input/car-licence/Dataset/Cars/",
        x_col="image_name",
        y_col=["xmin", "ymin", "xmax", "ymax"],
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode='raw',
        subset="training",
        validate_filenames=True  # Add this option to validate image filenames
    )

    validation_generator = datagen.flow_from_dataframe(
        df,
        directory="/kaggle/input/car-licence/Dataset/Cars/",
        x_col="image_name",
        y_col=["xmin", "ymin", "xmax", "ymax"],
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode='raw',
        subset="validation",
        validate_filenames=True  # Add this option to validate image filenames
    )

    if len(train_generator) == 0:
        raise ValueError("No valid images found for training. Check the 'filename' column in the DataFrame.")
    
    if len(validation_generator) == 0:
        raise ValueError("No valid images found for validation. Check the 'filename' column in the DataFrame.")

    # Set use_multiprocessing=True and workers=2 to use GPU for faster training
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=20,
        validation_data=validation_generator,
        verbose=1,
        use_multiprocessing=True,
        workers=2
    )

    return history.history["val_loss"][-1]


def calculate_iou(box1, box2):
    x1_min,y1_min,x1_max,y1_max=box1
    x2_min,y2_min,x2_max,y2_max=box2
    
    xa=max(x1_min,x2_min)
    ya=max(y1_min,y2_min)
    xb=min(x1_max,x2_max)
    yb=min(y1_max,y2_max)

    interArea=max(0,(xb-xa))*max(0,(yb-ya))

    boxAArea=(x1_max-x1_min)*(y1_max-y1_min)
    boxBArea=(x2_max-x2_min)*(y2_max-y2_min)

    iou=interArea/float(boxAArea+boxBArea-interArea)

    return iou

def iou_loss(y_true, y_pred):
    def process_boxes(y):
        y_shape = tf.shape(y)
        y = tf.reshape(y, (y_shape[0], -1))
        
        min_xy = y[..., 0:2]
        max_xy = y[..., 2:4]
        
        return min_xy, max_xy
    
    t_min, t_max = process_boxes(y_true)
    p_min, p_max = process_boxes(y_pred)
    
    xmin = tf.maximum(t_min[..., 0], p_min[..., 0])
    xmax = tf.minimum(t_max[..., 0], p_max[..., 0])
    ymin = tf.maximum(t_min[..., 1], p_min[..., 1])
    ymax = tf.minimum(t_max[..., 1], p_max[..., 1])
    
    intersection_area = tf.maximum(0.0, xmax - xmin) * tf.maximum(0.0, ymax - ymin)
    
    true_area = (t_max[..., 0] - t_min[..., 0]) * (t_max[..., 1] - t_min[..., 1])
    pred_area = (p_max[..., 0] - p_min[..., 0]) * (p_max[..., 1] - p_min[..., 1])
    union_area = true_area + pred_area - intersection_area
    
    iou = intersection_area / (union_area + tf.keras.backend.epsilon())
    
    return 1.0 - iou

# Step 3: Modify the optimize_hyperparameters function to include dense_units and l2_reg
def optimize_hyperparameters(df, x_train, y_train, x_test, y_test):
    optimizer = BayesianOptimization(
        f=partial(train_evaluate, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, df=df),
        pbounds={'learning_rate': (1e-5, 1e-2), 'batch_size': (16, 64), 'dense_units': (64, 512), 'l2_reg': (0.01, 0.1)},
        random_state=0,
    )

    optimizer.maximize(n_iter=5)

    best_params = optimizer.max['params']
    best_params['batch_size'] = int(best_params['batch_size'])
    best_params['dense_units'] = int(best_params['dense_units'])

    return best_params

In [None]:
df, x_train, x_test, y_train, y_test = load_and_extract_labels()
best_params = optimize_hyperparameters(df, x_train, y_train, x_test, y_test)
print("Best Hyperparameters:", best_params)

In [None]:
learning_rate = best_params['learning_rate']
batch_size = best_params['batch_size']
dense_units = best_params['dense_units']
l2_reg = best_params['l2_reg']

# Train the model with the best hyperparameters
model = build_object_detection_model(learning_rate, dense_units, l2_reg)

In [None]:
 datagen = ImageDataGenerator(
        rescale=1. / 255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.1
    )

    train_generator = datagen.flow_from_dataframe(
        df,
        directory="/kaggle/input/car-licence/Dataset/Cars/",
        x_col="image_name",
        y_col=["xmin", "ymin", "xmax", "ymax"],
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode='raw',
        subset="training",
        validate_filenames=True  # Add this option to validate image filenames
    )

    validation_generator = datagen.flow_from_dataframe(
        df,
        directory="/kaggle/input/car-licence/Dataset/Cars/",
        x_col="image_name",
        y_col=["xmin", "ymin", "xmax", "ymax"],
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode='raw',
        subset="validation",
        validate_filenames=True  # Add this option to validate image filenames
    )

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=50,
    validation_data=validation_generator,
    verbose=1,
    use_multiprocessing=True,
    workers=2
)

In [None]:
pred_coords = model.predict(x_test)
iou_list = [calculate_iou(true_coords, pred_coords[i]) for i, true_coords in enumerate(y_test)]
mean_iou = np.mean(iou_list)
print(f"Mean IoU on test set: {mean_iou}")