In [1]:
from img_cap_lib import *
# imports
import torch
import torchvision
import torchtext
from torchtext.vocab import vocab, GloVe, Vectors
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import os
from PIL import Image
import string
from collections import OrderedDict, Counter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import pickle
import os
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
import nltk

  from .autonotebook import tqdm as notebook_tqdm


# Daten herunterladen

In [2]:
data_download("flickr8k")

Data already exi sts at flickr8k


# Modell laden

In [3]:
# load model
model_stats = torch.load("models/without_normalisation.pt", map_location=torch.device('cpu'))
model = load_captioning_model(model_stats)

# Preprocessing

In [4]:
# caption preprocessing
embedding_dim = 300
min_frequency = 1

captions = pd.read_csv("flickr8k/captions.txt")
caption_preprocessor = CaptionPreprocessor(embedding=model_stats['embedding'].embedding_matrix, vocabulary=model_stats['embedding'].vocabulary ,captions=captions, embedding_dim=embedding_dim, min_frequency=min_frequency)
caption_preprocessor.preprocess()

# image preprocessing
img_preprocessor = ImagePreprocessor(normalize=False, image_folder_path="flickr8k")
img_preprocessor.preprocess_images()

Shape captions: (40460, 2)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.captions.caption = self.captions.caption.apply(lambda x: x.strip("."))


Shape captions after filtering: (39749, 3)
Removed Captions:  711 , in Percent:  1.76
transformed_images folder already exists. No preprocessing necessary.


# Datensplit und DataLoader

In [5]:
# create split
training_data, test_data = train_test_split(caption_preprocessor.captions, test_size=0.15, random_state=42)

# create datasets
train_dataset = FlickrDataset(captions=training_data, embedding=model.embedding)
test_dataset = FlickrDataset(captions=test_data, embedding=model.embedding)

# create dataloaders
batch_size = 4
train_loader = FlickrLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = FlickrLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True)

In [13]:
training_data

Unnamed: 0,image,caption,caption_word_length,vectorized_caption
34401,368954110_821ccf005c.jpg,"['<SOS>', 'three', 'children', 'sit', 'on', 't...",6,"[3, 48, 60, 167, 7, 6, 42, 4, 1, 1, 1, 1, 1, 1..."
13250,2687529141_edee32649e.jpg,"['<SOS>', 'acrobatic', 'entertainers', 'with',...",8,"[3, 2713, 6287, 11, 456, 378, 887, 71, 652, 4,..."
29817,3501206996_477be0f318.jpg,"['<SOS>', 'a', 'child', 'kicks', 'a', 'soccer'...",6,"[3, 2, 43, 585, 2, 106, 39, 4, 1, 1, 1, 1, 1, ..."
350,108898978_7713be88fc.jpg,"['<SOS>', 'skiiers', 'walking', 'up', 'the', '...",8,"[3, 1928, 61, 55, 6, 134, 34, 2, 292, 4, 1, 1,..."
36999,470373679_98dceb19e7.jpg,"['<SOS>', 'sillhouttes', 'of', 'people', 'in',...",14,"[3, 8373, 13, 24, 5, 50, 13, 2, 107, 9, 5, 50,..."
...,...,...,...,...
6391,2231847779_1148d1c919.jpg,"['<SOS>', 'a', 'baby', 'with', 'food', 'smeare...",14,"[3, 2, 144, 11, 447, 3584, 7, 74, 124, 9, 14, ..."
11510,2568417021_afa68423e5.jpg,"['<SOS>', 'a', 'boy', 'gets', 'ready', 'to', '...",8,"[3, 2, 17, 391, 353, 21, 1242, 2, 150, 4, 1, 1..."
38842,576093768_e78f91c176.jpg,"['<SOS>', 'several', 'young', 'boys', 'looking...",8,"[3, 182, 26, 96, 89, 65, 2, 984, 2904, 4, 1, 1..."
879,1211015912_9f3ee3a995.jpg,"['<SOS>', 'children', 'at', 'a', 'park', '<EOS...",4,"[3, 60, 23, 2, 119, 4, 1, 1, 1, 1, 1, 1, 1, 1,..."


In [6]:
class Evaluator:
    def __init__(self, model, dataloader, device):
        # initiate variables 
        self.model = model
        self.dataloader = dataloader
        self.device = device
        # self.model.eval()
        # assert self.dataloader.batch_size == 1, "Batch size must be 1 for evaluation."
    
    def evaluate(self):
        scores = []

        for i, (images, captions, lengths, vectorized_captions) in enumerate(self.dataloader):
            # move to device
            images = images.to(self.device)
            captions = captions.to(self.device)
            vectorized_captions = vectorized_captions.to(self.device)
            
            # forward pass
            output = self.model.forward(images)
            references = self.model.words[vectorized_captions.cpu()]

            for j in range(output.shape[0]):
                candidate = self.output_to_sentence(output[j,:])
                reference = self.output_to_sentence(references[j,:])
                scores.append(self.bleu_score(candidate, reference))
            
            print(f"Batch: {i+1} of {len(self.dataloader)}")

        print(f"Average BLEU score: {np.mean(scores)}")
        return np.mean(scores), scores

    @staticmethod
    def output_to_sentence(output:list):
        '''
        Removes Tokens from model output.
        '''
        output = [token for token in output if token not in ["<SOS>", "<EOS>", "<PAD>"]]
        return output

    @staticmethod
    def bleu_score(reference, candidate):
        '''
        Calculates the BLEU score for a single reference and candidate. Uses the SmoothingFunction for smoothing when no overlap between certain n-grams is found. 

        Params:
        -------
        reference: list of strings - The reference sentence.
        candidate: list of strings - The candidate sentence.

        Returns:
        --------
        bleu_score: float - The BLEU score.
        '''
        # calculate the BLEU score
        return nltk.translate.bleu_score.sentence_bleu(reference, candidate, smoothing_function=nltk.translate.bleu_score.SmoothingFunction().method1)

In [60]:
# train_evaluator = Evaluator(model, train_loader, device)
test_evaluator = Evaluator(model, test_loader, device)

# train_bleu, train_scores = train_evaluator.evaluate()
test_bleu, test_scores = test_evaluator.evaluate()

# print(f"Train BLEU: {train_bleu}")
print(f"Test BLEU: {test_bleu}")

KeyboardInterrupt: 