In [18]:
import os
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_ollama import OllamaEmbeddings

In [19]:
DATA_DIR = '../data'

TENANT = 'genetics'
INDEX_NAME = 'LangChain_a685b460f26b46478fbe90d4016ee64d'
# INDEX_NAME = 'LangChain_0b0b3243518a4c268131915f0ebe04cb' #solar system
TEXT_KEY = 'text'
LLM_MODEL_NAME = "llama3.2:3b"

In [20]:
weaviate_client = weaviate.connect_to_local()

embeddings = OllamaEmbeddings(
    model=LLM_MODEL_NAME,
)

vec_db = WeaviateVectorStore(
    client=weaviate_client,
    index_name=INDEX_NAME,
    text_key=TEXT_KEY,
    embedding=embeddings
)

In [21]:
weaviate_client.collections.list_all()

{'LangChain_a685b460f26b46478fbe90d4016ee64d': _CollectionConfigSimple(name='LangChain_a685b460f26b46478fbe90d4016ee64d', description=None, generative_config=None, properties=[_Property(name='text', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer='none'), _Property(name='page_label', description="This property was generated by Weaviate's auto-schema feature on Sun Mar  2 03:50:08 2025", data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=None, vectorizer='none'), _Property(name='producer', description="This property was generated by Weaviate's auto-schema feature on Sun Mar  2 03:50:08 2025", data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=Fal

In [22]:
from pydantic import BaseModel
from typing import List, Dict, Optional

from langchain import hub
from langchain_ollama import ChatOllama


prompt = hub.pull("rlm/rag-prompt")


class State(BaseModel):
    question: str
    context: Optional[List[Dict]]
    answer: str
    tenant: str


llm = ChatOllama(
    model=LLM_MODEL_NAME,
    temperature=0,
)



In [23]:
def retrieve(db, state: State):
    retrieved_docs = db.similarity_search(
        state["question"], tenant=state['tenant'])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke(
        {"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [None]:
%%time

state = {'question': "tell me about important enzymes which transcribe DNA", 'context': None, 'answer': '', 'tenant': TENANT}

context = retrieve(vec_db, state)
print("got the context!!")
state['context'] = context['context']

print(generate(state)['answer'])

got the context!!
I don't know which enzymes specifically transcribe DNA from the provided context. However, I can tell you that transcription is a process regulated by regulatory elements such as promoters, enhancers, and silencers. These elements help determine whether or not a gene is transcribed into RNA.
CPU times: user 74.7 ms, sys: 46 ms, total: 121 ms
Wall time: 2min 21s
