In [4]:
import os
import tqdm
import torch
import numpy as np
import pickle
from torchvision.models import resnet50

In [None]:
model = resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  
model.cuda()
model.eval()

In [None]:
image_folder = "processed_images/"
features = {}

# img_file = "{id}.npy"
for img_file in tqdm.tqdm(os.listdir(image_folder), total=len(os.listdir(image_folder)), desc="Extracting features"):
	book_id = img_file.split(".")[0]
	img_path = os.path.join(image_folder, img_file)
	img = np.load(img_path)
	img_tensor = torch.from_numpy(img).unsqueeze(0).cuda()
	
	with torch.no_grad():
		feature = model(img_tensor).squeeze()
		feature = feature.cpu().numpy()
		features[book_id] = feature

In [9]:
with open("image_features.pkl", "wb") as f:
    pickle.dump(features, f)

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

with open("image_features.pkl", "rb") as f:
    features = pickle.load(f)

product_ids = list(features.keys())
feature_matrix = np.array(list(features.values()))

similarity_matrix = cosine_similarity(feature_matrix)

def get_similar_products(product_id, top_k=5):
    idx = product_ids.index(product_id)
    similar_indices = np.argsort(similarity_matrix[idx])[::-1][1:top_k+1]
    return [product_ids[i] for i in similar_indices]

print(get_similar_products("2"))


['6676', '821', '12285', '12969', '8456']


In [6]:
import json

recommendations = {}
for product_id in product_ids:
    recommendations[product_id] = get_similar_products(product_id)

with open("recommendations.json", "w") as f:
    json.dump(recommendations, f)

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

def load_image(img_path):
	img = np.load(img_path)
	img = np.transpose(img, (1, 2, 0))
	return img

def plot_recommendations(product_id):
	img_path = os.path.join(image_folder, f"{product_id}.npy")

	if not os.path.exists(img_path):
		print(f"Query image not found: Book {product_id} doesn't exist.")
		return

	similar_products = get_similar_products(product_id)
	img = load_image(img_path)
	
	fig, axes = plt.subplots(1, len(similar_products) + 1, figsize=(15, 5))
	axes[0].imshow(img)
	axes[0].set_title("Query Product")
	axes[0].axis("off")

	for i, similar_id in enumerate(similar_products):
		img_path = os.path.join(image_folder, f"{similar_id}.npy")
		img = load_image(img_path)
		axes[i + 1].imshow(img)
		axes[i + 1].set_title(f"Similar {i+1}")
		axes[i + 1].axis("off")
		
	plt.show()

plot_recommendations("2")


In [18]:
import pandas as pd

data = pd.read_csv("data/collaborative_filtering.csv")

# build users' ground truth
ground_truth = data[data['Rating'] >= 4].groupby('UserId')['BookId'].apply(set).apply(list)

def coverage(recommended_set, ground_truth_set):
    return len(recommended_set & ground_truth_set) / len(ground_truth_set)

# Evaluation for all users
def evaluate_model(recommendations, ground_truth = ground_truth, k=5):
    coverages = []
    
    for user in ground_truth.index:
        recommended_set = set()
        for book in ground_truth[user]:
            book = str(book)
            recommended_set = recommended_set | set(map(int, recommendations[book]))

        coverages.append(coverage(recommended_set, set(ground_truth[user])))
        print(f"User: {user}")
        print(f"\tRecommendation: {recommended_set}")
        print(f"\tGround Truth: {set(ground_truth[user])}, Coverage: {coverages[-1]}\n")

    avg = sum(coverages) / len(coverages)
    
    
    print(f"Average Coverage Rate = {avg:.4f}")
    return avg


In [19]:
evaluate_model(recommendations)

User: A01038432MVI9JXYTTK5T
	Recommendation: {10244, 6089, 7440, 12436, 3158, 8027, 2076, 9053, 6685, 5470, 5083, 1761, 8220, 3877, 12325, 2408, 7403, 3691, 10095, 1009, 3251, 12214, 2877, 4606, 6399}
	Ground Truth: {3877, 4423, 7403, 299, 5135, 7440, 11407, 12436, 12214, 5657, 5470}, Coverage: 0.5454545454545454

User: A01254073JW8SSTKH6AIB
	Recommendation: {5568, 68, 6214, 4363, 2828, 3979, 9625, 9121, 3877, 10663, 7403, 4208, 13362, 4790, 13371}
	Ground Truth: {4790, 10852, 8454, 6847}, Coverage: 0.25

User: A035042126FPCW9EUHU1U
	Recommendation: {5568, 9121, 3877, 6214, 4363, 3979, 7403, 4208, 4790, 9625}
	Ground Truth: {4790, 8454, 6847}, Coverage: 0.3333333333333333

User: A0919846H34XADJMF99R
	Recommendation: {10177, 231, 3977, 9674, 7756, 14166, 11032, 858, 4892, 3679}
	Ground Truth: {10177, 2164, 2366, 3679}, Coverage: 0.5

User: A100V1W0C8BWOL
	Recommendation: {3457, 12038, 2184, 6412, 11026, 13718, 9878, 1820, 10656, 8994, 7970, 1575, 9128, 6057, 14251, 13995, 6446, 6191, 13

0.2250336140493406