# **ĐANG SUY NGHĨ XEM NÊN VIẾT CÁI GÌ ĐỂ GIỚI THIỆU**

# **Mount to Gdrive**

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# **Import requirement lib**

In [2]:
import os # used to handle files using system commands.
import pickle # used to store numpy features extracted
import numpy as np  # used to perform a wide variety of mathematical operations 
                    # on arrays
from tqdm.notebook import tqdm  # progress bar decorator for iterators. 
                                # Includes a default range iterator printing to 
                                # stderr.

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input 
    # imported modules for feature extraction from the image data
from tensorflow.keras.preprocessing.image import load_img, img_to_array
    # used for loading the image and converting the image to a numpy array
from tensorflow.keras.preprocessing.text import Tokenizer
    # used for loading the text as convert them into a token
from tensorflow.keras.preprocessing.sequence import pad_sequences
    # used for equal distribution of words in sentences filling the remaining 
    # spaces with zeros
from tensorflow.keras.models import Model
    # stored model
from tensorflow.keras.utils import to_categorical, plot_model
    # used to visualize the architecture of the model through different images
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
    # operation for advanced model option

In [3]:
! pip install gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gradio
  Downloading gradio-3.18.0-py3-none-any.whl (14.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.2/14.2 MB[0m [31m51.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting markdown-it-py[linkify,plugins]>=2.0.0
  Downloading markdown_it_py-2.1.0-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.5/84.5 KB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
Collecting aiofiles
  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)
Collecting orjson
  Downloading orjson-3.8.6-cp38-cp38-manylinux_2_28_x86_64.whl (140 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.7/140.7 KB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn
  Downloading uvicorn-0.20.0-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 KB[0m [31m7.5 MB/s[0m eta [

# **Prepare for Demo**

## **Load Model extract feature img**
VGG16 pretrain except fully connect with ouput 1000 classes)

In [4]:
vgg_model = VGG16() 
# restructure the model
vgg_model = Model(inputs=vgg_model.inputs,             
                  outputs=vgg_model.layers[-2].output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


## **Prepare vocab**

### **Load Captions with imgID**

In [5]:
with open('/content/gdrive/MyDrive/Đồ án/Machine learning/Image captioning/Caption with imgID/caption_with_imgID_flickr.pkl', 'rb') as f:
    mapping = pickle.load(f)

### **Stored mapping as list**

In [6]:
all_captions = []
for key in mapping:
    for caption in mapping[key]:
        all_captions.append(caption)

### **Create a vocabulary**

In [7]:
# tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

### **Get max_length**

In [8]:
# get maximum length of the caption available
max_length = max(len(caption.split()) for caption in all_captions)
max_length

34

## **Load model**

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [10]:
model = tf.keras.models.load_model('/content/gdrive/MyDrive/Đồ án/Machine learning/Image captioning/model/best_model_15_02_2023_20Epoches.h5')

## **Function predict**

In [11]:
# decode from integer to word

def idx_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
          return word
    return None

In [12]:
# generate caption for an image
def predict_caption(model, image, tokenizer, max_length):
  # add start tag for generation process
  in_text = 'startseq'

  # iterate over the max length of sequence
  for i in range(max_length): # max_length we got: 34
    # encode input sequence
    sequence = tokenizer.texts_to_sequences([in_text])[0]
    # pad the sequence
    sequence = pad_sequences([sequence], max_length)
    # predict next word
    yhat = model.predict([image, sequence], verbose=0)
    # get index with high probability
    yhat = np.argmax(yhat)
    # convert index to word
    word = idx_to_word(yhat, tokenizer)
    
    # append eng tag and stop if word not found
    if word is None:
      in_text += " " + 'endseq'
      break
    # append word as input for generating next word
    in_text += " " + word
    # stop if we reach end tag
    if word == 'endseq':
      break
  return in_text

## **Demo function**

In [13]:
def image_captioning(image_path, model=model, tokenizer=tokenizer, max_length=max_length):
  # load image
  image = load_img(image_path, target_size=(224, 224))
  # convert image pixels to numpy array
  image = img_to_array(image)
  # reshape data for model
  image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
  # preprocess image from vgg
  image = preprocess_input(image)
  # extract features
  feature = vgg_model.predict(image, verbose=0)
  # predict from the trained model
  return predict_caption(model, feature, tokenizer, max_length)

# **Demo**

In [14]:
import numpy as np
import gradio as gr

demo = gr.Interface(image_captioning, inputs=gr.Image(type='filepath'), outputs='text')
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://ad9885e3-d79b-4878.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


