In [11]:
from img_cap_lib import *
import nltk

# Modellpfad festlegen

In [4]:
model_path = "models/standart_model_with_normalisation.pt"

# Daten herunterladen

In [5]:
data_download("flickr8k")

Data already exi sts at flickr8k


# Modell laden

In [6]:
# load model
model_stats = torch.load(model_path, map_location=torch.device('cpu'))
model = load_captioning_model(model_stats)

# Preprocessing

In [8]:
# caption preprocessing
embedding_dim = 300
min_frequency = 1

captions = pd.read_csv("flickr8k/captions.txt")
caption_preprocessor = CaptionPreprocessor(embedding=model_stats['embedding'].embedding_matrix, captions_path="flickr8k/captions.txt", vocabulary=model_stats['embedding'].vocabulary ,captions=captions, embedding_dim=embedding_dim, min_frequency=min_frequency)
caption_preprocessor.preprocess()

# image preprocessing
img_preprocessor = ImagePreprocessor(normalize=True, image_folder_path="flickr8k")
img_preprocessor.preprocess_images()

Shape captions: (40460, 2)
Shape captions after filtering: (39749, 3)
Removed Captions:  711 , in Percent:  1.76


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.captions.caption = self.captions.caption.apply(lambda x: x.strip("."))


transformed_images folder already exists. No preprocessing necessary.


# Datensplit und DataLoader

In [9]:
# create split
training_data, test_data = train_test_split(caption_preprocessor.captions, test_size=0.15, random_state=42)

# create datasets
train_dataset = FlickrDataset(captions=training_data, embedding=model.embedding)
test_dataset = FlickrDataset(captions=test_data, embedding=model.embedding)

# create dataloaders
batch_size = 64
train_loader = FlickrLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = FlickrLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [16]:
class Evaluator:
    def __init__(self, model, dataloader, device):
        # initiate variables 
        self.model = model
        self.dataloader = dataloader
        self.device = device
    
    def evaluate(self, weight):
        scores = []

        for i, (images, captions, lengths, vectorized_captions) in enumerate(self.dataloader):
            # move to device
            images = images.to(self.device)
            captions = captions.to(self.device)
            vectorized_captions = vectorized_captions.to(self.device)
            
            # forward pass
            output = self.model.forward(images)
            references = self.model.words[vectorized_captions.cpu()]

            for j in range(output.shape[0]):
                candidate = self.output_to_sentence(output[j,:])
                reference = self.output_to_sentence(references[j,:])
                scores.append(self.bleu_score(candidate, reference, weight))
            
            print(f"Batch: {i+1} of {len(self.dataloader)}")

        print(f"Average BLEU score: {np.mean(scores)}")
        return np.mean(scores), scores

    @staticmethod
    def output_to_sentence(output:list):
        '''
        Removes Tokens from model output.
        '''
        output = [token for token in output if token not in ["<SOS>", "<EOS>", "<PAD>"]]
        return output

    @staticmethod
    def bleu_score(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25)):
        '''
        Calculates the BLEU score for a single reference and candidate. Uses the SmoothingFunction for smoothing when no overlap between certain n-grams is found. 

        Params:
        -------
        reference: list of strings - The reference sentence.
        candidate: list of strings - The candidate sentence.

        Returns:
        --------
        bleu_score: float - The BLEU score.
        '''
        # calculate the BLEU score
        return nltk.translate.bleu_score.sentence_bleu(reference, candidate, weights=weights, smoothing_function=nltk.translate.bleu_score.SmoothingFunction().method1)

In [17]:
# create evaluators
evaluator_train = Evaluator(model, train_loader, "cpu")
evaluator_test = Evaluator(model, test_loader, "cpu")
 
# weights
weights = [
    (1, 0, 0, 0),
    (0.5, 0.5, 0, 0),
    (0.33, 0.33, 0.33, 0),
    (0.25, 0.25, 0.25, 0.25)
]

train_scores = []
test_scores = []

# evaluate
for weight in weights:
    # evaluate
    train_scores.append(evaluator_train.evaluate(weight))
    test_scores.append(evaluator_test.evaluate(weight))

# export results
with open("train_scores.pkl", "wb") as f:
    pickle.dump(train_scores, f)

with open("test_scores.pkl", "wb") as f:
    pickle.dump(test_scores, f)

Batch: 1 of 4222
Batch: 2 of 4222
Batch: 3 of 4222
Batch: 4 of 4222
Batch: 5 of 4222
Batch: 6 of 4222


KeyboardInterrupt: 