In [13]:
import numpy as np
import faiss
from openai import OpenAI
from dotenv import load_dotenv

In [14]:
load_dotenv()

True

In [15]:
client = OpenAI()

In [16]:
# Represents a user's previous reviews of other books
reviews = [
    "I hate stories about backpacking. It's boring.",
    "Beautifully descriptive but predictable plot.",
    "Compelling dystopia but overwhelmingly bleak.",
    "Timeless romance with sharp social commentary.",
    "Loved the fast-paced mystery — couldn't put it down!",
    "Too much exposition and not enough action.",
    "Hilarious satire with surprisingly emotional depth.",
    "Characters felt flat despite an intriguing premise.",
    "Incredible world-building, but the pacing dragged.",
    "Poetic writing style, but the story lacked substance.",
    "Heartwarming coming-of-age tale with relatable struggles.",
    "A dense philosophical read — rewarding, but slow.",
    "Surprisingly moving memoir — raw and honest.",
    "Fascinating premise but the ending felt rushed.",
    "Gritty noir thriller with sharp dialogue and tension."
]


In [17]:

def get_embedding(text):
	text = text.replace("/n", " ")
	return client.embeddings.create(
		input = [text],
		model="text-embedding-3-small",
	).data[0].embedding


In [18]:

def index_reviews(reviews):
	# Get embeddings for reviews
	vectors = []
	for review in reviews:
		vectors.append(get_embedding(review))
		
	# Create index
	d = len(vectors[0]) # dimension of vector
	index = faiss.IndexFlatL2(d)
	
	# Reshape vectors into 2D array then add to index
	vectors = np.array(vectors).reshape(len(vectors), -1)
	index.add(vectors)

	return index

In [19]:

def retrieve_reviews(index, query, reviews, k=2):
	# Get embedding for query
	query_vector = get_embedding(query)

	# Reshape vector into 2D array, then search index
	query_vector = np.array(query_vector).reshape(1, -1)
	distances, indices = index.search(query_vector, k)

	return [reviews[i] for i in indices[0]]

In [21]:
index = index_reviews(reviews)
book = "The Beach by Alex Garland critiques backpacker culture, showcasing the selfishness and moral decay behind their pursuit of an untouchable dream."
related_reviews = retrieve_reviews(index, book, reviews)

print(related_reviews)

["I hate stories about backpacking. It's boring.", 'Compelling dystopia but overwhelmingly bleak.']


In [23]:
def predict_rating(book, related_reviews):
    reviews_text = "\n".join(related_reviews)

    prompt = (
        "Here is a book I might want to read:\n" + book + "\n\n"
        "Here are relevant reviews from the past:\n" + reviews_text + "\n\n"
        "On a scale of 1 (worst) to 10 (best), "
        "how likely am I to enjoy this book? "
        "Reply with no explanation, just a number."
    )

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "user",
            "content": prompt  # <-- no quotes around variable name!
        }],
        max_tokens=2000,
        temperature=0.7
    )

    return response.choices[0].message.content


In [24]:
predict_rating(book, related_reviews)

'6'