In [1]:
import pandas as pd
import numpy as np
from qdrant_client import QdrantClient
# from qdrant_client.http.models import VectorParams, Distance
from openai import OpenAI
import os
import random

In [2]:
qdrant_api_key = os.environ["QDRANT_API_KEY"]
qdrant_url = os.environ["QDRANT_URL"]
openai_api_key = os.environ["OPENAI_API_KEY"]

In [3]:
client = OpenAI(api_key=openai_api_key)

In [4]:
qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)

In [5]:
def create_embedding(text):
    response = client.embeddings.create(input=text, model="text-embedding-ada-002")
    return response.data[0].embedding

In [6]:
query = "I'm looking for a graphic novel about the monkey king."

query_vector = create_embedding(query)

In [7]:
collection_name = "best-book-ever"
search_results = qdrant_client.search(
    collection_name=collection_name,
    query_vector=query_vector,
    limit=5,
)

In [9]:
type(search_results)

list

In [10]:
for hit in search_results:
    print(type(hit))
    print(hit.payload['title'])
    print(hit.payload['genres_list'])
    print("")

<class 'qdrant_client.http.models.models.ScoredPoint'>
Monkey: The Journey to the West
['Classics', 'Fiction', 'Fantasy', 'China', 'Mythology', 'Literature', 'Chinese Literature', 'Asia', 'Adventure', 'Novels']

<class 'qdrant_client.http.models.models.ScoredPoint'>
American Born Chinese
['Graphic Novels', 'Young Adult', 'Comics', 'Fiction', 'Fantasy', 'Graphic Novels Comics', 'Teen', 'Coming Of Age', 'School', 'Cultural']

<class 'qdrant_client.http.models.models.ScoredPoint'>
Monkey King
['Fiction', 'China', 'Contemporary']

<class 'qdrant_client.http.models.models.ScoredPoint'>
Red Earth and Pouring Rain
['Fiction', 'India', 'Fantasy', 'Magical Realism', 'Indian Literature', 'Literature', 'Asia', 'Literary Fiction', 'Novels', 'Historical Fiction']

<class 'qdrant_client.http.models.models.ScoredPoint'>
Prince Valiant Vol. 1: The Prophecy
['Comics', 'Fantasy', 'Graphic Novels', 'Fiction']



In [20]:
hits = []
for hdx, hit in enumerate(search_results):
    hitstr = f"<u>Result #{hdx+1}</u> \nTitle: {hit.payload["title"]} \nAuthor: {hit.payload["author"]} \n{hit.payload["descriptor"]}"
    
    hits.append(hitstr)

hitstr = "\n\n\n".join(hits)
print(hitstr)

<u>Result #1</u> 
Title: Monkey: The Journey to the West 
Author: Wu Cheng'en, Arthur Waley (Translator/Adapter), Hu Shih (Introduction) 
Description: Probably the most popular book in the history of the Far East, this classic sixteenth century novel is a combination of picaresque novel and folk epic that mixes satire, allegory, and history into a rollicking adventure. It is the story of the roguish Monkey and his encounters with major and minor spirits, gods, demigods, demons, ogres, monsters, and fairies. This translation, by the distinguished scholar Arthur Waley, is the first accurate English version; it makes available to the Western reader a faithful reproduction of the spirit and meaning of the original.

Genres: Classics, Fiction, Fantasy, China, Mythology, Literature, Chinese Literature, Asia, Adventure, Novels


<u>Result #2</u> 
Title: American Born Chinese 
Author: Gene Luen Yang (Goodreads Author) 
Description: All Jin Wang wants is to fit in. When his family moves to a ne

In [None]:
hits = []
for hdx, hit in enumerate(search_results):
    title = hit.payload.get("title", "Unknown title")
    author = hit.payload.get("author", "Unknown author")
    description = hit.payload.get("description", "No description available.")
    genres = hit.payload.get("genres", "Unknown genre")

    # Truncate descriptor for brevity
    if len(description) > 150:
        description = description[:150] + "..."

    # Format hit information
    hitstr = (
        f"Result #{hdx + 1}:\n"
        f"Title: {title}\n"
        f"Author: {author}\n"
        f"Description: {description}\n"
        f"Genres: {genres}\n"
    )
    hits.append(hitstr)

# Combine all hits into a single string
context = "\n".join(hits)
print(context)

Result #1:
Title: Monkey: The Journey to the West
Author: Wu Cheng'en, Arthur Waley (Translator/Adapter), Hu Shih (Introduction)
Description: Description: Probably the most popular book in the history of the Far East, this classic sixteenth century novel is a combination of picaresque novel ...

Result #2:
Title: American Born Chinese
Author: Gene Luen Yang (Goodreads Author)
Description: Description: All Jin Wang wants is to fit in. When his family moves to a new neighborhood, he suddenly finds that he's the only Chinese American stude...

Result #3:
Title: Monkey King
Author: Patricia Chao
Description: Description: Monkey King&nbsp tells the story of 28-year-old Sally Wang, a Chinese-American woman whose mental breakdown and sojourn in a hospital set...

Result #4:
Title: Red Earth and Pouring Rain
Author: Vikram Chandra, Jordan Pavlin (Editor)
Description: Description: Combining Indian myths, epic history, and the story of three college kids in search of America, a narrative include

In [5]:
system_message = """
You are a retrieval agent for a vector database of books. A user will provide a query that will ask for book recommendations.
If the user's message is not intended to search for a book, you will return an empty string. Otherwise, your job is to extract the key
information from the query to search the vector database. 

The vectors in the database are embeddings of the book descriptions and the genres. You will formulate a text string consisting of a
description and genres and return it.
"""

# Refined by ChatGPT
system_message = """
You are a retrieval agent for a vector database of books. YOU ARE NOT A CHATBOT. A user will provide a query to ask for book recommendations.
If the user's query is not related to searching for a book (e.g., casual conversation or off-topic request), respond with an empty string.

Your primary task is to extract the key information from the user's query to perform a search in the vector database. 
The database vectors represent embeddings of book descriptions and genres.

To assist with the search, you will formulate and return a text string combining the extracted description and genres from the query.
If the user's query does not include specific genres or a description, omit those fields from the output but ensure the format remains consistent.

This is the format you will use:

Description: some_description

Genres: list_of_genres
"""

In [6]:
queries = ["What’s your favorite book?",
           "Can you recommend a fantasy book with dragons and epic battles?", 
          ]

queries = ["What are some good books for children about adventure?"]

In [7]:
for query in queries:
    response = openai_retriever.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": query},
        ]
    )

    print(f"Query: {query} Response: {response.choices[0].message.content}")

Query: What are some good books for children about adventure? Response: Description: adventure books for children

Genres: children, adventure
