In [8]:
from transformers import BertTokenizer, BertModel #pip install transformers --user
import torch #pip install torch --user
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
from framesToStrip import display_images_with_same_height

tokenizer = BertTokenizer.from_pretrained('TurkuNLP/bert-base-finnish-cased-v1')

def preprocess_text(text):
    # Tokenize the text
    tokenized_text = tokenizer.encode(text, add_special_tokens=False)
    # Convert token IDs to strings
    processed_text = tokenizer.decode(tokenized_text)
    return processed_text

Read the csv file and process sentences

In [9]:
csv_file = 'computerVision.csv'
df = pd.read_csv(csv_file)
predefined_sentences =  df['texts'].tolist()

result_dict = dict(zip(df['texts'], df['img']))

# Preprocess each sentence in the predefined list
processed_predefined_sentences = []
for sentence in predefined_sentences:
    input = str(sentence)
    if len(input) != 0:
        p = preprocess_text(input)
        processed_predefined_sentences.append(p)

Model training

In [10]:
model = BertModel.from_pretrained('TurkuNLP/bert-base-finnish-cased-v1')

# Encode input and predefined sentences
with torch.no_grad():
    predefined_ids = tokenizer(processed_predefined_sentences, return_tensors='pt', padding=True, truncation=True)['input_ids']
    predefined_embeddings = model(predefined_ids)[0].mean(dim=1)  # Mean pooling over token embeddings for predefined sentences

In [11]:
# Sanity check on sizes
print(len(predefined_ids))
print(len(processed_predefined_sentences))
print(len(predefined_sentences))
print(len(predefined_embeddings))

1031
1031
1031
1031


In [12]:
input = "tänään on tilipäivä"
processed_input = preprocess_text(input)

with torch.no_grad():
    input_ids = tokenizer(processed_input, return_tensors='pt')['input_ids']
    input_embeddings = model(input_ids)[0].mean(dim=1)  # Mean pooling over token embeddings for input sentence

similarities = cosine_similarity(input_embeddings, predefined_embeddings)


In [13]:
from PIL import Image

def displayComic(imagePaths, stripHeight=300):
    # Calculate the aspect ratio of each image
    aspect_ratios = []
    images = []
    for path in imagePaths:
        img = Image.open(path)
        images.append(img)
        aspect_ratios.append(img.width / img.height)   
    
    # Calculate the width of each image based on the strip height
    imgWidths = [int(stripHeight * aspect_ratio) for aspect_ratio in aspect_ratios]
    
    # Create a new blank image for the comic strip
    stripWidth = sum(imgWidths)
    comicStrip = Image.new('RGB', (stripWidth, stripHeight), color='white')
    
    # Paste images onto the comic strip
    currentX = 0
    for img, width in zip(images, imgWidths):
        resized = img.resize((width, stripHeight), Image.LANCZOS)
        comicStrip.paste(resized, (currentX, 0))
        currentX += width
    
    # Display the comic strip
    comicStrip.show()

In [14]:
numStories = 3
# Find the index of the most similar story
closestStories = np.argpartition(similarities[0], -numStories)[-numStories:]
closestStories = closestStories[np.argsort(similarities[0][closestStories])][::-1]

paths = [result_dict[predefined_sentences[i]] for i in closestStories]
print(paths)
displayComic(paths)

['frameSet/350-0.jpg', 'frameSet/90-0.jpg', 'frameSet/47-1.jpg']
