# Imports


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model

from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add


In [3]:
TEST_PATH = '/content/drive/Shareddrives/ImageClef-2024/FullData/ROCOv2/path/test'
WORKING_PATH = '/content/drive/Shareddrives/ImageClef-2024/Final'

# Loading the Model


In [4]:
from keras.models import load_model
model = load_model(WORKING_PATH + '/task2Model.h5')

# Loading Train Images and Captions


In [5]:
#load train features from pickle

with open (WORKING_PATH + '/features.pkl', 'rb') as f:
    features_train = pickle.load(f)

In [6]:
##Load the captions data for train dataset

with open(WORKING_PATH + '/train_captions.csv', 'r') as f:
  next(f)
  captions_doc = f.read()

In [7]:
#After mapping images and captions, function to clean the captions

#cleaning all the captions
#preprocessing text

import tensorflow as tf

from tensorflow.keras.layers import Conv2D
def clean(mapping):
    for key, captions in mapping.items():
        for i in range (len(captions)):
            caption = captions[i]

            #preprocessing

            caption = caption.lower()
            caption = caption.replace('[^A-Za-z]', '')  #except mentioned, replace all
            caption = caption.replace('\s+', ' ') #replaces multiple space to single space

            caption = 'startseq ' + "  ".join([word for word in caption.split() if len(word)>1]) + ' endseq'
            captions[i] = caption


In [8]:
#mappings between images and captions
from tqdm import tqdm
mapping_train = {}

#process lines

for line in tqdm(captions_doc.split('\n')):
    #split line into image and caption (, split)
    tokens = line.split(',')
    if len(line) < 2:
        continue
    image_id, caption = tokens[0], tokens[1:]

    #remove jpg from image id
    image_id = image_id.split('.')[0]

    #convert caption list to a string
    caption = "".join(caption)

    if image_id not in mapping_train:
        mapping_train[image_id] = []
    mapping_train[image_id].append(caption)

100%|██████████| 60164/60164 [00:00<00:00, 167197.01it/s]


In [9]:
#Cleaning the train mapping and its captions

#preprocessing function called for train captions
clean(mapping_train)

In [10]:
#list of all captions available
all_captions = []
for key in mapping_train:
  for caption in mapping_train[key]:
    all_captions.append(caption)

In [11]:
## whats the point of combining all captions together, how do we know which for which image?
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

In [12]:
#Number of vocabularies obtained

vocab_size

29156

In [13]:
# get maximum length of caption available

max_length = max(len(caption.split()) for caption in all_captions)
max_length

404

# Loading Test from Drive and saving features

In [None]:
## load vgg16 model

modelvgg = VGG16()

#  restructuring the model
modelvgg = Model(inputs=modelvgg.inputs, outputs=modelvgg.layers[-2].output)

#summarize
print(modelvgg.summary())


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     14758

In [None]:
directory_test = '/content/drive/Shareddrives/ImageClef-2024/test2024/path/test/'

In [None]:
print(len(os.listdir(directory_test)))

17237


In [None]:
#extract features
from IPython.display import clear_output

features_test = {}


cnt = 0
tot = 0
for img_name in os.listdir(directory_test):
    cnt += 1
    tot += 1
    # Construct the full path to the image
    img_path = os.path.join(directory_test, img_name)

    # Open the image
    image = tf.keras.utils.load_img(img_path, target_size=(224,224))

    # Convert pixels to numpy array
    image = tf.keras.utils.img_to_array(image)

    # Reshape the image
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))

    # Preprocess the image for VGG
    image = preprocess_input(image)

    # Get features using the pre-trained model
    feature = modelvgg.predict(image)

    # Get image ID
    image_id = img_name.split('.')[0]

    # Store feature
    features_test[image_id] = feature
    print(image_id)
    if(cnt == 20):
        cnt=0
        clear_output()
        print(tot)


17220
ImageCLEFmedical_Caption_2024_test_000981
ImageCLEFmedical_Caption_2024_test_006096
ImageCLEFmedical_Caption_2024_test_005757
ImageCLEFmedical_Caption_2024_test_005758
ImageCLEFmedical_Caption_2024_test_005531
ImageCLEFmedical_Caption_2024_test_011780
ImageCLEFmedical_Caption_2024_test_005979
ImageCLEFmedical_Caption_2024_test_005977
ImageCLEFmedical_Caption_2024_test_005978
ImageCLEFmedical_Caption_2024_test_013503
ImageCLEFmedical_Caption_2024_test_013707
ImageCLEFmedical_Caption_2024_test_005579
ImageCLEFmedical_Caption_2024_test_016173
ImageCLEFmedical_Caption_2024_test_016170
ImageCLEFmedical_Caption_2024_test_016172
ImageCLEFmedical_Caption_2024_test_016169
ImageCLEFmedical_Caption_2024_test_016171


In [None]:
import pickle

# Assuming 'features_test' is the object you want to pickle
with open('/content/drive/Shareddrives/ImageClef-2024/Final/featuresTest24.pkl', 'wb') as f:
    pickle.dump(features_test, f)

# Loading Test Images and Captions

In [14]:
#load test features from pickle

with open (WORKING_PATH + '/featuresTest24.pkl', 'rb') as f:
    features_test = pickle.load(f)

In [15]:
#Predictions are indices, which can be converted to tokens/words
def idx_to_word(integer, tokenizer):
  for word, index in tokenizer.word_index.items():
    if index == integer:
      return word

  return None

In [16]:
#generating the actual caption

def predict_caption(model, image, tokenizer, max_length):
  in_text = 'startseq'
  for i in range(max_length):
    #encode input sequence
    sequence = tokenizer.texts_to_sequences([in_text])[0]
    sequence = pad_sequences([sequence], max_length)

    yhat = model.predict([image, sequence], verbose=0)
    #gives probability of the 8000 columns words
    #we obtain indices of higher probability

    yhat = np.argmax(yhat)
    #indices to word
    word = idx_to_word(yhat, tokenizer)

    #stop when no more words found
    if word is None:
      break
    in_text += " " + word

    #stop on reaching end tag
    if word == 'endseq':
      break

  return in_text

In [17]:
# #Load the test captions

# with open(WORKING_PATH+'/test_captions.csv', 'r') as f:
#     next(f)
#     captions_test_doc = f.read()

In [18]:
# print(captions_test_doc)

In [19]:
# #mappings between images and captions
# from tqdm import tqdm
# mapping_test = {}

# #process lines

# for line in tqdm(captions_test_doc.split('\n')):
#     #split line into image and caption (, split)
#     tokens = line.split(',')
#     if len(line) < 2:
#         continue
#     image_id, caption = tokens[0], tokens[1:]

#     #remove jpg from image id
#     image_id = image_id.split('.')[0]

#     #convert caption list to a string
#     caption = "".join(caption)

#     if image_id not in mapping_test:
#         mapping_test[image_id] = []
#     mapping_test[image_id].append(caption)


In [20]:
# print(mapping_test['ROCOv2_2023_test_000001'])

In [21]:
print(list(features_test.keys())[0])

ImageCLEFmedical_Caption_2024_test_006629


# Predicting the results


In [None]:
# from nltk.translate.bleu_score import corpus_bleu
# import tqdm
# #validate with test dataCUI_train
# actual, predicted = list(), list()

# for key, value in features_test.items():
#   # captions = mapping_test[key]
#   y_pred = predict_caption(model, value, tokenizer, max_length)
#   # actual_captions = [caption.split() for caption in captions]
#   # actual.append(actual_captions)
#   predicted.append(y_pred.split())
#   break


In [None]:
# from nltk.translate.bleu_score import corpus_bleu
# import tqdm
# #validate with test dataCUI_train
# actual, predicted = {}, {}

# cnt=0
# for key, value in features_test.items():
#   # captions = mapping_test[key]
#   y_pred = predict_caption(model, value, tokenizer, max_length)
#   # actual_captions = [caption.split() for caption in captions]
#   # actual[key] = actual_captions
#   predicted[key]= y_pred.split()
#   cnt+=1
#   if(cnt==10):
#     break


In [None]:
# predicted_strings = {}
# actual_strings = {}

In [None]:
# for key in predicted.keys():
#   cap = ""
#   lst=predicted[key]
#   for word in lst[1:-1]:
#     cap += word + " "
#   predicted_strings[key]=cap

# for key in actual.keys():
#   cap = ""
#   lst=actual[key]
#   for line in lst:
#     for word in line:
#       cap += word + " "
#   actual_strings[key] = cap

In [None]:
# predicted_strings

{'ImageCLEFmedical_Caption_2024_test_006629': 'ct scan of the abdomen and pelvis showing the presence of the mass in the right lobe of the liver ',
 'ImageCLEFmedical_Caption_2024_test_006509': 'mri of the brain showing the right sided clival mass ',
 'ImageCLEFmedical_Caption_2024_test_006832': 'ct scan of the chest showing the presence of the presence of the mass in the right upper lobe of the right lung ',
 'ImageCLEFmedical_Caption_2024_test_007820': 'ercp showing the distal metal stent in the distal common bile duct ',
 'ImageCLEFmedical_Caption_2024_test_007477': 'panoramic radiograph of the patient ',
 'ImageCLEFmedical_Caption_2024_test_009547': 'coronal view of the right hand showing the presence of the bone marrow edema in the distal phalanx of the left femur ',
 'ImageCLEFmedical_Caption_2024_test_008397': 'mri of the brain showing the right sided intraconal hematoma ',
 'ImageCLEFmedical_Caption_2024_test_007124': 'ct scan of the chest showing bilateral patchy ground glass 

In [None]:
# # prompt: write a dictionary to csv file that will be saved in drive. the key is 1st value in csv. the value is followed by the key in csv

# import csv

# with open('/content/drive/Shareddrives/ImageClef-2024/Final/Task2Results.csv', 'w', newline='') as csvfile:
#   writer = csv.writer(csvfile, delimiter=',')
#   for key, value in predicted_strings.items():
#     writer.writerow([key, value])
# with open('/content/drive/Shareddrives/ImageClef-2024/Final/Task2Actuals.csv', 'w', newline='') as csvfile:
#   writer = csv.writer(csvfile, delimiter=',')
#   for key, value in actual_strings.items():
#     writer.writerow([key, value])


In [22]:
# prompt: sort a dictionary based on key

def sort_dict_by_key(dict_data):
    sorted_dict = {}
    ccnt = 0
    for key in sorted(dict_data.keys()):
        ccnt += 1
        if(704 <= ccnt and ccnt <= 6000):
            sorted_dict[key] = dict_data[key]
        else:
          continue

    return sorted_dict


In [23]:
features_sorted_test = sort_dict_by_key(features_test)

In [24]:
print(len(features_sorted_test.keys()))

5297


In [30]:
features_sorted_test['ImageCLEFmedical_Caption_2024_test_006009']

array([[0.        , 0.        , 1.1651884 , ..., 0.        , 0.45411587,
        3.3696191 ]], dtype=float32)

In [None]:
# prompt: open 2 above files at the same time to write
from nltk.translate.bleu_score import corpus_bleu
import tqdm
import csv
from IPython.display import clear_output

actual, predicted = {}, {}
predicted_strings = {}
with open('/content/drive/Shareddrives/ImageClef-2024/Final/Task2Results.csv', 'a', newline='') as results_file:
  results_writer = csv.writer(results_file, delimiter=',')

  cnt = 0
  for key in features_sorted_test.keys():
    cnt += 1
    print(key)

    y_pred = predict_caption(model, features_sorted_test[key], tokenizer, max_length)
    predicted[key]= y_pred.split()

    cap=""
    lst=predicted[key]
    for word in lst[1:-1]:
      cap += word + " "
    predicted_strings[key]=cap

    results_writer.writerow([key, predicted_strings[key]])
    if(cnt%20 == 0):
      clear_output()
      print(cnt)

3880
ImageCLEFmedical_Caption_2024_test_004592
ImageCLEFmedical_Caption_2024_test_004593
ImageCLEFmedical_Caption_2024_test_004594


In [None]:
# calculate BLEU score

print("BLEU-1: %f" % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
print("BLEU-2: %f" % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))

BLEU-1: 0.133395
BLEU-2: 0.073330


In [None]:
# !pip install evaluate
from evaluate import load
bertscore = load("bertscore")
results = bertscore.compute(predictions=predicted_strings, references=actual_strings, lang="en")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
pp = 0
for val in results['precision']:
  pp += val
print(pp/len(results['precision']))

rr = 0
for val in results['recall']:
  rr += val
print(rr/len(results['recall']))

ff = 0
for val in results['f1']:
  ff += val
print(ff/len(results['f1']))

0.8697220772504807
0.8419271880388259
0.8550720202922821


In [None]:
results

{'precision': [0.8737030029296875,
  0.8448029160499573,
  0.8702583312988281,
  0.8588204383850098,
  0.8321487903594971,
  0.8712301850318909,
  0.8872138261795044,
  0.8417867422103882,
  0.8420666456222534,
  0.8447216749191284,
  0.8248202204704285,
  0.8873696327209473,
  0.8625765442848206,
  0.8564962148666382,
  0.9012247323989868,
  0.8185930848121643,
  0.8760736584663391,
  0.862419605255127,
  0.8207155466079712,
  0.8880691528320312,
  0.9215533137321472,
  0.9000866413116455,
  0.6124196648597717,
  0.8650169968605042,
  0.8599019050598145,
  0.8895283341407776,
  0.8971860408782959,
  0.8732478618621826,
  0.8662784695625305,
  0.9134531021118164,
  0.8856464624404907,
  0.9142813682556152,
  0.9263355731964111,
  0.9261306524276733,
  0.8995828032493591,
  0.8310228586196899,
  0.8836182951927185,
  0.9172022342681885,
  0.8670722246170044,
  0.8821762800216675,
  0.8838317394256592,
  0.866588830947876,
  0.9140645861625671,
  0.82408607006073,
  0.8934664130210876,
 