In [None]:
import os
LANGCHAIN_API_KEY = os.getenv(key="LANGCHAIN_API_KEY")
LANGCHAIN_ENDPOINT = os.getenv(key="LANGCHAIN_ENDPOINT")
LANGCHAIN_TRACING_V2 = os.getenv(key="LANGCHAIN_TRACING_V2")
LANGCHAIN_PROJECT = os.getenv(key="LANGCHAIN_PROJECT")


In [None]:
LANGCHAIN_PROJECT

In [3]:
import bs4
import streamlit as st
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.llms.ollama import Ollama
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
## Data ingestion
def data_ingestion():
    loader = WebBaseLoader(
        web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("post-content", "post-title", "post-header")
            )
        ),
    )
    docs = loader.load()
    # Split Data
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents=docs)
    return splits

In [5]:
def phi_llm():
    llm = Ollama(model="phi", temperature=0, timeout=300)
    return llm


def gemma_llm():
    llm = Ollama(model="gemma", temperature=0, timeout=300)
    return llm


def embed_llm():
    llm = OllamaEmbeddings(model="nomic-embed-text")
    return llm

In [None]:
# create a vector store
def create_vector_store(doc):
    vectordb = Chroma.from_documents(documents=doc, embedding=embed_llm(), persist_directory="chroma_index")
    vectordb.persist()
    return vectordb

In [None]:
def create_retriever():
    vectorstore = Chroma(persist_directory="chroma_index", embedding_function=embed_llm())
    retriever = vectorstore.as_retriever()
    return retriever

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
def get_llm_response(llm, retriever, query):
    prompt = hub.pull("rlm/rag-prompt")
    rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
    )
    answer = rag_chain.invoke({"query": query})
    return answer

In [None]:
docs = data_ingestion()
vectordb = create_vector_store(docs)


In [None]:
llm = phi_llm()
retriever = create_retriever()
user_question = "What is Langchain?"

In [None]:
retriever.invoke(user_question)

In [None]:
answer = get_llm_response(llm, retriever, user_question)

In [10]:
# from langchain_community.vectorstores import Weaviate
# import weaviate

# docs = data_ingestion()
# vectordb = Weaviate.from_documents(documents=docs, embedding=embed_llm())

# from weaviate.embedded import EmbeddedOptions

# client = weaviate.Client(embedded_options=embed_llm())

In [11]:
from langchain.schema.messages import HumanMessage, SystemMessage
from langchain_community.chat_models import ChatOllama

In [None]:
messages = [
  SystemMessage(
   content="""You're an assistant knowledgeable about
   healthcare. Only answer healthcare-related questions."""
  ),
  HumanMessage(content="What is Medicaid managed care?"),
 ]