In [1]:
import tensorflow as tf
import os 
import xml.etree.ElementTree as ET
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.layers import Input,GlobalMaxPooling2D,Dense


In [25]:
image_directory='images'
annotation_dir='Annotations'
classes={'english_springer':0,'westhighland_terrier':1,'irish_wolfhound':2,'bull_mastiff':3,'whippet':4}

In [28]:
def parse_voc_annotations(annotation_dir,image_directory,classes):
    img_path=[]
    bbox_data=[]
    class_labels=[]
    for xml_file in sorted(os.listdir(annotation_dir)):
        if not xml_file.endswith('.xml'):
            # print(xml_file)
            continue
        
        tree=ET.parse(os.path.join(annotation_dir,xml_file))
        # print(tree)
        root= tree.getroot()
        # print(root)
        img_file_name=root.find('filename').text
        # print(img_file_name)
        path=os.path.join(image_directory,img_file_name)
        # print(path)
        size=root.find('size')
        # print(size)
        img_width=int(size.find('width').text)
        # print(img_width)
        img_height=int(size.find('height').text)

        objects=root.find('object')
        # print(objects)
        if objects is not None:
            class_name=objects.find('name').text
            # print(class_name)
            if class_name not in classes:
                continue
            # img_path.append(class_name)
            class_id=classes[class_name]
            # print(class_id)

            bnbox=objects.find('bndbox')
            xmin=float(bnbox.find('xmin').text) / img_width
            ymin=float(bnbox.find('ymin').text) / img_height
            xmax=float(bnbox.find('xmax').text) / img_width
            ymax=float(bnbox.find('ymax').text) / img_height
            img_path.append(path)
            bbox_data.append([xmin, ymin, xmax, ymax])
            class_labels.append(class_id)
    return img_path,bbox_data,class_labels



In [29]:
parse_voc_annotations(annotation_dir=annotation_dir,image_directory=image_directory,classes=classes)

(['images\\0123c356-irish_wolfhound_20.jpg',
  'images\\02f775b8-english_springer_6.jpg',
  'images\\0348655e-english_springer_27.jpg',
  'images\\03cdf529-irish_wolfhound_16.jpg',
  'images\\054b16ed-english_springer_31.jpg',
  'images\\06d7520a-bull_mastiff_6.jpg',
  'images\\08a05225-irish_wolfhound_17.jpg',
  'images\\08cc69b3-english_springer_3.jpg',
  'images\\093a451b-whippet_77.jpg',
  'images\\09e772ce-english_springer_24.jpg',
  'images\\0b3e051b-whippet_93.jpg',
  'images\\0cb2a82d-whippet_92.jpg',
  'images\\0f72e9ec-english_springer_35.jpg',
  'images\\146a3c29-whippet_99.jpg',
  'images\\16207c28-irish_wolfhound_23.jpg',
  'images\\1d530f65-irish_wolfhound_37.jpg',
  'images\\1dc190c7-whippet_83.jpg',
  'images\\1e7b8aab-english_springer_42.jpg',
  'images\\1eaa750f-bull_mastiff_39.jpg',
  'images\\1ef0ed4f-english_springer_26.jpg',
  'images\\2194e49a-bull_mastiff_33.jpg',
  'images\\21b8ba9e-english_springer_28.jpg',
  'images\\25190a2d-english_springer_22.jpg',
  'imag

In [30]:
img_path,bnbox_data,class_labels=parse_voc_annotations(annotation_dir=annotation_dir,image_directory=image_directory,classes=classes)
img_path=tf.constant(img_path)
bnbox_data=tf.constant(bnbox_data,dtype=tf.float32)
class_labels=tf.constant(class_labels,dtype=tf.int32)

In [31]:
img_path

<tf.Tensor: shape=(160,), dtype=string, numpy=
array([b'images\\0123c356-irish_wolfhound_20.jpg',
       b'images\\02f775b8-english_springer_6.jpg',
       b'images\\0348655e-english_springer_27.jpg',
       b'images\\03cdf529-irish_wolfhound_16.jpg',
       b'images\\054b16ed-english_springer_31.jpg',
       b'images\\06d7520a-bull_mastiff_6.jpg',
       b'images\\08a05225-irish_wolfhound_17.jpg',
       b'images\\08cc69b3-english_springer_3.jpg',
       b'images\\093a451b-whippet_77.jpg',
       b'images\\09e772ce-english_springer_24.jpg',
       b'images\\0b3e051b-whippet_93.jpg',
       b'images\\0cb2a82d-whippet_92.jpg',
       b'images\\0f72e9ec-english_springer_35.jpg',
       b'images\\146a3c29-whippet_99.jpg',
       b'images\\16207c28-irish_wolfhound_23.jpg',
       b'images\\1d530f65-irish_wolfhound_37.jpg',
       b'images\\1dc190c7-whippet_83.jpg',
       b'images\\1e7b8aab-english_springer_42.jpg',
       b'images\\1eaa750f-bull_mastiff_39.jpg',
       b'images\\1ef0ed4f-

In [39]:
def load_and_preprocess_image(path, bbox, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image.set_shape([None, None, 3])
    image = tf.image.resize(image, [128, 128])
    label_encode = tf.one_hot(label, depth=len(classes))
    image = image / 255.0
    return image, {"class_output": label_encode, "box_output": bbox}

dataset= tf.data.Dataset.from_tensor_slices((img_path, bnbox_data, class_labels))
dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)


dataset = dataset.shuffle(buffer_size=196).batch(10).prefetch(tf.data.AUTOTUNE)


DATASET_SIZE = len(img_path)
train_size = int(0.8 * DATASET_SIZE)
train_ds = dataset.take(train_size)
val_ds = dataset.take(train_size)

In [40]:
print(f"img_path length: {len(img_path)}")
print(f"bnbox_data length: {len(bnbox_data)}")
print(f"class_labels length: {len(class_labels)}")


img_path length: 160
bnbox_data length: 160
class_labels length: 160


In [41]:
base_model=tf.keras.applications.MobileNetV2(
    input_shape=(128,128,3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable=False

In [42]:
input=Input(shape=(128,128,3))

# x=tf.keras.layers.RandomZoom(0.1)
# x=tf.keras.layers.RandomBrightness(0.2)(x)
# x=tf.keras.layers.RandomFlip('vertical and horizontal')(x)
# x=tf.keras.layers.Rescaling(1./255)(x)
x=base_model(input)

x=GlobalMaxPooling2D()(x)


class_output=tf.keras.layers.Dense(30,activation='relu')(x)
class_output=tf.keras.layers.Dense(5,activation='softmax',name='class_output')(class_output)

box_output=tf.keras.layers.Dense(30,activation='relu')(x)
box_output=tf.keras.layers.Dense(4,activation='sigmoid',name='box_output')(box_output)

detect_model=tf.keras.models.Model(inputs=input,outputs=[class_output,box_output])

In [44]:
detect_model.compile(
    optimizer='adam',
    loss={
        
        'class_output':'categorical_crossentropy',
        'box_output':'mse'
    },
    metrics={
        'class_output':'accuracy',
        'box_output':'mse'
    }
)

In [45]:
detect_model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds,
    callbacks=[
        EarlyStopping(monitor='val_class_output_accuracy',patience=3,restore_best_weights=True,mode='max'),
        ModelCheckpoint('dog_types.keras',monitor='val_class_output_loss',save_best_only=True,mode='min',verbose=1)
    ]
    
)

Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - box_output_loss: 0.1077 - box_output_mse: 0.1077 - class_output_accuracy: 0.4555 - class_output_loss: 2.2193 - loss: 2.3269
Epoch 1: val_class_output_loss improved from inf to 0.50703, saving model to dog_types.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 295ms/step - box_output_loss: 0.1048 - box_output_mse: 0.1048 - class_output_accuracy: 0.4648 - class_output_loss: 2.1726 - loss: 2.2774 - val_box_output_loss: 0.0336 - val_box_output_mse: 0.0336 - val_class_output_accuracy: 0.8500 - val_class_output_loss: 0.5070 - val_loss: 0.5407
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - box_output_loss: 0.0354 - box_output_mse: 0.0354 - class_output_accuracy: 0.8415 - class_output_loss: 0.5903 - loss: 0.6257
Epoch 2: val_class_output_loss improved from 0.50703 to 0.10347, saving model to dog_types.keras
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x171800fe270>

In [13]:
from tensorflow.keras.utils import load_img, img_to_array
import numpy as np


In [14]:
# img="datasets/bullmastiff1.jpg"
# print(img)

In [15]:
# image = load_img(img, target_size=(128,128))

In [16]:
# img_array=tf.keras.utils.img_to_array(image)
# norm=img_array/255.0
# img_batch=np.expand_dims(norm,axis=0)

In [17]:
# dog_type=detect_model.predict(img_batch)
# print(dog_type)
# # kelvin=np.argmax(dog_type)
# # kelvin

In [18]:
classe=['english_springer','westhighland_terrier','irish_wolfhound','bull_mastiff','whippet']

In [19]:
# predicted_classes = [np.argmax(p) for p in dog_type]
# predicted=predicted_classes[0]
# final_output=classe[predicted]
# print(final_output)


In [20]:
import gradio as gr

In [46]:
from PIL import Image


In [None]:
from PIL import Image


In [50]:
def dog_type_prediction(imge):
    img=imge.resize((128,128))
    img_array=tf.keras.utils.img_to_array(img)
    norm=img_array/255
    img_batch=np.expand_dims(norm,axis=0)

    dog_type=detect_model.predict(img_batch)
    predicted_classes=[np.argmax(p) for p in dog_type]
    predicted=predicted_classes[0]
    return classe[predicted]
    # return final_output

interface=gr.Interface(
    fn=dog_type_prediction,
    inputs=gr.Image(type='pil'),
    outputs=gr.Label(),
    title='dog_type prediction model',
    description='upload the dog image to see the output'
)
interface.launch(share=True)
    


* Running on local URL:  http://127.0.0.1:7865
* Running on public URL: https://793ff99d8b27e9ea19.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 271ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step


In [None]:
# def prediction(image):
#     img=image.resize((128,128,3))
#     img_array=tf.keras.utils.img_to_array(img)
#     img_batch=np.expand_dims(img_array,axis=0)
#     predictions=detect_model.predict(img_batch)
#     index=np.argmax(predictions)
#     return classe[index]
# interface=gr.Interface(
#     fn=prediction,
#     inputs=gr.Image(type='pil'),
#     outputs=gr.Label(),
#     title='dog type prediction',
#     description='upload an image to see'
# )
# interface.launch(share=True)

