In [2]:
import pandas as pd
from langchain_community.document_loaders import CSVLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

# read in CSV data and load into vector database
embeddings = OllamaEmbeddings(model="mxbai-embed-large")

raw_documents = CSVLoader("./data/totally_real_books.csv").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

books_db = Chroma.from_documents(documents=raw_documents, embedding=embeddings)


In [3]:
# perform similarity search on vector database and return top hits
def get_recommendations(
        query: str,
        top_hits: int = 3) -> pd.DataFrame:
    results = books_db.similarity_search(query, k=top_hits)

    search_hits = []
    for i in range(0, len(results)):
        [title, author, description, publisher, year] = results[i].page_content.split("\n")
        search_hits.append({
            'title': title.replace('title: ',""),
            'author': author.replace('author: ',""),
            'description': description.replace('description: ',""),
            'publisher': publisher.replace('publisher: ',""),
            'year': year.replace('year: ',"")
        }
        )
    return pd.DataFrame(search_hits)

get_recommendations("A book about Michigan")

Unnamed: 0,title,author,description,publisher,year
0,Historic Michigan: Stories About The Great Lak...,Arthur Ageless,"A book about Michigan, and how its geography s...",HistoryAlivePublishing,1911
1,Historic Michigan: Stories About The Great Lak...,Arthur Ageless,"A book about Michigan, and how its geography s...",HistoryAlivePublishing,1911
2,Southern Belle: A Romance,Riley Roman,"A romantic story set in Alabama, full of drama...",Belles & Co,1936
