In [1]:
import pandas as pd
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
import os


In [2]:
df = pd.read_csv("train.csv")

In [3]:
# db_init = ["name: "+n+" ; ingredients: "+i+" ; preparation: "+i for n, i in zip(df["name"].values, df["ingredients"].values, df["steps"].values)]
db_init = ["name: "+e["name"]+" ; ingredients: "+e["ingredients"]+" ; preparation: "+e["steps"] for i, e in df.iterrows()]
db_init = db_init[:50]

In [4]:

class CBRDatabase:
    RECIPE_ID_KEY = "recipe_id"

    def __init__(self, texts = None, metadatas:list = None, db_directory="dataset_chroma_db", initialize_new=False) -> None:
        embedding_function = GPT4AllEmbeddings()
        
        if initialize_new:
            if os.path.isdir(db_directory): raise Exception("Provided directory exists")
            self.db = Chroma.from_texts(texts = texts, embedding = embedding_function, metadatas=metadatas, persist_directory=db_directory)
        else:
            if texts or metadatas: raise Exception("You are trying to load exising database, not crate new.")
            self.db = Chroma(persist_directory=db_directory, embedding_function=embedding_function)

    def add_examples(self, texts, metadatas:list = None):
        self.db.add_texts(texts, metadatas)

    def similarity_search(self, **kwargs):
        res = self.db.similarity_search(**kwargs)
        return res

    @staticmethod
    def create_metadatas(recipe_indices:list, batch_metadata:dict = None):
        metadatas = [{CBRDatabase.RECIPE_ID_KEY:e} for e in recipe_indices]
        if batch_metadata:
            for d in metadatas:
                d.update(batch_metadata)
        return metadatas


In [6]:
# example of create or load vextore database

metadata = CBRDatabase.create_metadatas(list(range(len(db_init))))
# db = CBRDatabase(db_init, metadata, db_directory="dataset_chroma_db", initialize_new=True)
db = CBRDatabase(db_directory="dataset_chroma_db", initialize_new=False)

# count of elements in database
len(db.db._collection.get()["documents"])


bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


150

In [7]:
# example of adding new recipes 

# metadatas = CBRDatabase.create_metadatas(list(range(len(db_init))), {"test":True})
# db.add_examples(db_init, metadatas)
# len(db.db._collection.get()["documents"])

200

In [8]:
# example of similarity search with filter

NAME = "Sandwich"
INGREDIENTS = "cheese, ham"
query = "name: "+NAME+" ; ingredients: "+INGREDIENTS

docs = db.similarity_search(query=query, k=4, filter={'test':True})
docs

[Document(page_content='name: Green deviled eggs n ham ; ingredients: eggs, mayonnaise, honey mustard, sweet pickle relish, green food coloring, honey-baked ham, lemon pepper, paprika ; preparation: Carefully cut eggs into halves lengthwise. Just as carefully, scoop out yolks & into a bowl. Mash yolks well, then mix in mayo, mustard & relish. Add green food color one drop at a time, mixing well after each drop, until desired intensity of green is achieved. Add chopped ham & lemon pepper & mix well. Carefully spoon & slightly mound yolk mixture into each hollowed egg white half. If using paprika, sprinkle lightly over the green deviled eggs & serve.', metadata={'recipe_id': 16, 'test': True}),
 Document(page_content='name: Green deviled eggs n ham ; ingredients: eggs, mayonnaise, honey mustard, sweet pickle relish, green food coloring, honey-baked ham, lemon pepper, paprika ; preparation: Carefully cut eggs into halves lengthwise. Just as carefully, scoop out yolks & into a bowl. Mash y

In [9]:
# example of retrive document information and text content

docs[0].page_content, docs[0].metadata[CBRDatabase.RECIPE_ID_KEY], docs[0].metadata

('name: Green deviled eggs n ham ; ingredients: eggs, mayonnaise, honey mustard, sweet pickle relish, green food coloring, honey-baked ham, lemon pepper, paprika ; preparation: Carefully cut eggs into halves lengthwise. Just as carefully, scoop out yolks & into a bowl. Mash yolks well, then mix in mayo, mustard & relish. Add green food color one drop at a time, mixing well after each drop, until desired intensity of green is achieved. Add chopped ham & lemon pepper & mix well. Carefully spoon & slightly mound yolk mixture into each hollowed egg white half. If using paprika, sprinkle lightly over the green deviled eggs & serve.',
 16,
 {'recipe_id': 16, 'test': True})