In [None]:
import os
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.autograd import Variable
from PIL import Image

# Set the path to your image folder
image_folder_path = "movie_posters"  # Replace with the path to your "movie_posters" folder

# Load the pre-trained ResNet-50 model
resnet = models.resnet50(pretrained=True)
resnet.eval()  # Set the model to evaluation mode

# Define the transformation to preprocess the images
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

embeddings = []
name2embedding = {}
id2name = {}

# Get a list of all image file paths in the folder
image_paths = [os.path.join(image_folder_path, file) for file in os.listdir(image_folder_path) if file.endswith(".jpg")]

print(image_paths)


In [None]:

# Process images one by one and generate embeddings
for image_path in image_paths:
    image = Image.open(image_path)
    # print("Processing:", image_path)
    try:
        image = preprocess(image)
        image = image.unsqueeze(0)  # Add batch dimension

        with torch.no_grad():
            embedding = resnet(image)
        embeddings.append(embedding)
        name2embedding[image_path[14:-4]] = embedding
    except:
        print("Error processing:", image_path)
    print(len(embeddings))
        

print(name2embedding)

# 'embeddings' now contains the feature vectors for all images in the "movie_posters" folder
embeddings = torch.cat(embeddings, 0)

# Now, 'embeddings' contains the feature vectors for all images in the "movie_posters" folder
print(embeddings.shape)  # The shape will be (num_images, 1000) for ResNet-50


In [None]:
import csv
import re

file_path = 'items.csv'
file = open(file_path, encoding="utf8")

data_reader = csv.reader(file)
data = [row for row in data_reader]

id2name = {}
moviedesc = []
movie_names = []


for i in range(1, len(data)):
    movie_names.append(data[i][5])
    # print({i-1}, {data[i][5]})

for i in range(len(movie_names)):
    movie_names[i] = movie_names[i].strip()
    movie_names[i] = movie_names[i][:-6]
    movie_names[i] = movie_names[i].strip()

for i in range(len(movie_names)):
    tmp = tmp = re.sub(r'[\\/:"*?<>|]', '', movie_names[i])
    id2name[i] = str(i)+"_"+tmp

print(id2name)


In [None]:
image_embeddings = []
for i in range(len(movie_names)):
    image_embeddings.append(name2embedding[id2name[i]])
    print("Processing:", id2name[i])



In [None]:
import numpy as np

new_image_embeds = []
print(len(image_embeddings))
print(image_embeddings[0].shape)
for t in image_embeddings:
    temp= t.squeeze()
    new_image_embeds.append(temp)

print("size of tensor == ", len(new_image_embeds[0]))
print("size of new_image_embeds== ", len(new_image_embeds))
resized_image_embeds= []
# resizing text embeddings to 384 
for t in new_image_embeds:
    resized_i = torch.nn.functional.interpolate(t.unsqueeze(0).unsqueeze(0), size = (4096,), mode = 'linear').squeeze()
    resized_image_embeds.append(resized_i)

print("size of new tensor == ", resized_image_embeds[0].size())
print("size of resized_image_embeds== ", len(resized_image_embeds))
# saving text embeddings in text_feat.npy
numpy_array = [tensor.detach().numpy() for tensor in resized_image_embeds]

stacked_resized_text_embeds = np.stack(numpy_array, axis=0)

np.save('data/ml100k/image_feat.npy',stacked_resized_text_embeds)
print("saved text embeddings in image_feat.npy")