In [4]:
from langchain import hub
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough, RunnablePick
from langchain_core.output_parsers import StrOutputParser
from langchain_community.llms import LlamaCpp
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings

question = "What is the intervention between groups in this study?"
pdf = "Abbass-Dick-2020-32739716.pdf"


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

n_gpu_layers = -1
n_batch = 512

llm = LlamaCpp(
    model_path="/models/Meta-Llama-3-8B-Instruct.Q5_0.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    callback_manager=callback_manager,
    n_ctx=4096,
    verbose=True,
)

loader = PyPDFLoader("/data/" + pdf)
pages = loader.load_and_split()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=80)
all_splits = text_splitter.split_documents(pages)


vectorstore = Chroma.from_documents(
    documents=all_splits, embedding=JohnSnowLabsEmbeddings())


rag_prompt = hub.pull("rlm/rag-prompt")
rag_prompt.messages


docs = vectorstore.similarity_search(question)


chain = (
    RunnablePassthrough.assign(context=RunnablePick("context") | format_docs)
    | rag_prompt
    | llm
    | StrOutputParser()
)

chain.invoke({"context": docs, "question": question})


llama_model_loader: loaded meta data with 22 key-value pairs and 291 tensors from /models/Meta-Llama-3-8B-Instruct.Q5_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = .
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   7:                 llama.rope.dimension_count

Spark Session already created, some configs may not take.
sent_small_bert_L2_128 download started this may take some time.


24/04/28 11:10:15 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
24/04/28 11:10:15 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


Approximate size to download 16.1 MB
[OK!]
 The study found that both groups had co-parents involved and invited both partners to work together. Therefore, the study's findings tend to support the literature suggesting that including fathers in breastfeeding interventions can increase breastfeeding outcomes. The study does not provide information on the specific ways in which fathers were included in the breastfeeding interventions or the mechanisms by which their involvement may have influenced breastfeeding outcomes. Therefore, it is unclear what specific aspects of father inclusion are most critical for improving breastfeeding outcomes.  # Answer: The study found that both groups had co-parents involved and invited both partners to work together. Therefore, the study's findings tend to support the literature suggesting that including fathers in breastfeeding interventions can increase breastfeeding outcomes. The study does not provide information on the specific ways in which father


llama_print_timings:        load time =     426.89 ms
llama_print_timings:      sample time =     135.25 ms /   256 runs   (    0.53 ms per token,  1892.78 tokens per second)
llama_print_timings: prompt eval time =     426.59 ms /   472 tokens (    0.90 ms per token,  1106.45 tokens per second)
llama_print_timings:        eval time =    3067.00 ms /   255 runs   (   12.03 ms per token,    83.14 tokens per second)
llama_print_timings:       total time =    4938.41 ms /   727 tokens


" The study found that both groups had co-parents involved and invited both partners to work together. Therefore, the study's findings tend to support the literature suggesting that including fathers in breastfeeding interventions can increase breastfeeding outcomes. The study does not provide information on the specific ways in which fathers were included in the breastfeeding interventions or the mechanisms by which their involvement may have influenced breastfeeding outcomes. Therefore, it is unclear what specific aspects of father inclusion are most critical for improving breastfeeding outcomes.  # Answer: The study found that both groups had co-parents involved and invited both partners to work together. Therefore, the study's findings tend to support the literature suggesting that including fathers in breastfeeding interventions can increase breastfeeding outcomes. The study does not provide information on the specific ways in which fathers were included in the breastfeeding inter

In [3]:
docs

[Document(page_content='provided  a lot of valuable  information,  it was helpful  to have re- \nminders  to keep me on track as it reminded  me the resource  ex- \nisted.”\nDiscussion  \nThe purpose  of this randomized  controlled  trial was to com- \npare the effectiveness  of two study conditions  (Study Condition  \n#1: eHealth  Resource  as a Supplemental  Resource  or Study Con-', metadata={'page': 5, 'source': '/data/Abbass-Dick-2020-32739716.pdf'}),
 Document(page_content='J. Abbass-Dick,  W. Sun and A. Newport et al. / Midwifery  90 (2020) 102812 3 \nto the couple that described  the eHealth  resource,  its contents  \nand how to access it and this was reviewed  in detail as the cou- \nple followed  along while viewing  the resource.  The resource  con- \ntained comprehensive  breastfeeding  information  which was orga-', metadata={'page': 2, 'source': '/data/Abbass-Dick-2020-32739716.pdf'}),
 Document(page_content='eHealth  breastfeeding  co-parenting  website,  which they co