In [None]:
# link one https://www.gracedupage.org/about-us 

In [8]:
from haystack.nodes import LinkContentFetcher
from haystack.schema import Document
from typing import List 
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import AnswerParser, PromptNode, PromptTemplate
from haystack import Pipeline
from haystack.nodes import  DensePassageRetriever, EmbeddingRetriever
import os
from dotenv import load_dotenv

In [2]:
def initialize_rag_pipeline(retriever, openai_key):
    """
    Initialize a pipeline for RAG-based question answering.

    Args:
        retriever (DensePassageRetriever): Dense passage retriever.
        openai_key (str): API key for OpenAI.

    Returns:
        query_pipeline (Pipeline): Pipeline for RAG-based question answering.
    """
    prompt_template = PromptTemplate(prompt = """"Answer the following query based on the provided context. \
                                                Documents: {documents}\n
                                                Query: {query}\n
                                                Answer: 
                                            """,
                                            output_parser=AnswerParser())
    prompt_node = PromptNode(model_name_or_path = "gpt-4", #https://rmm2064ls3doz1on.us-east-1.aws.endpoints.huggingface.cloud",
                            api_key = openai_key,
                            default_prompt_template = prompt_template, 
                            max_length = 4054,
                            model_kwargs={"stream":True},
                            debug=True)

    query_pipeline = Pipeline()
    query_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
    query_pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["Retriever"])

    return query_pipeline

In [3]:
load_dotenv("../.env")
openai_key = os.getenv("OPENAI_API_KEY")


In [4]:
# Initialize document store
document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)
retriever = DensePassageRetriever(
                document_store=document_store,
                query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                use_gpu=True,
                embed_title=True,
                batch_size=16,
    )

  return self.fget.__get__(instance, owner)()


In [5]:
def scrape_web_links(link_list):
    """
    Scrape the web links provided in the list and return the documents.

    Args:
        link_list (List[str]): List of links to scrape.

    Returns:
        all_documents (List[List[Document]]): List of documents for each link.
    """
    link_content_fetcher = LinkContentFetcher()
    documents = []
    all_documents = []

    for link in link_list:
        documents : List[Document] = link_content_fetcher.fetch(url=link)
        document_store.write_documents(documents)
        all_documents.append(documents)

    return document_store, all_documents

# Scrape the web links
link_list = ["https://www.gracedupage.org/about-us",
                'https://www.gracedupage.org/who-is-jesus',
                'https://www.gracedupage.org/purpose',
                'https://www.gracedupage.org/history',
                'https://www.gracedupage.org/pastoral-ministries',
                'https://www.gracedupage.org/leadership',
                'https://www.gracedupage.org/baptism-and-membership']

document_store, all_documents = scrape_web_links(link_list)

# Add documents embeddings to index
document_store.update_embeddings(retriever=retriever)

Writing Documents: 10000it [00:00, 356077.15it/s]       
Writing Documents: 10000it [00:00, 876644.16it/s]       
Writing Documents: 10000it [00:00, 2004446.36it/s]      
Writing Documents: 10000it [00:00, 2011656.59it/s]      
Writing Documents: 10000it [00:00, 2270627.98it/s]      
Writing Documents: 10000it [00:00, 1890688.78it/s]      
Writing Documents: 10000it [00:00, 1823531.15it/s]      
Documents Processed: 10000 docs [00:04, 2336.68 docs/s]      


In [6]:
from haystack.nodes import PreProcessor

preprocessor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=False,
    split_by="word",
    split_length=100,
    split_respect_sentence_boundary=True,
)

In [None]:

docs_nrsb = preprocessor.process(all_documents)

In [None]:
document_store.get_all_documents()

In [9]:
# Initialize pipeline
query_pipeline = initialize_rag_pipeline(retriever=retriever, openai_key= openai_key)


In [None]:
answer = query_pipeline.run(query="Hello")

In [None]:
document_store.

In [10]:
answer3 = query_pipeline.run(query="What God Does For Us", documents=document_store.get_all_documents())

According to the documents, God created man in His image and likeness, free of sin with a rational nature, intelligence, volition, self-determination, and moral responsibility. God's purpose in creating man was for them to glorify Him, enjoy His fellowship, live in His will, multiply and fill the world with faith-filled image bearers, and reflect His benevolent kingship. However, after the sin of Adam, man lost their innocence and became subject to the wrath of God. Yet, God provides salvation through His grace and the redemptive work of Jesus Christ.The document does not provide specific information on what God does for us.The documents do not provide specific information on what God does for us.God equips certain people with unique gifts to serve as "shepherds" or "teachers" within the church community. These individuals are meant to guide and care for God's people, helping them grow towards spiritual maturity, stability, unity, and love. They serve God and one another towards a deep

In [None]:
answer3['answers'][0].answer +"\n"+ f"Context retrieved from: {answer3['documents'][0].meta['url']} with vector id: {answer3['documents'][0].meta['vector_id']}"

In [None]:
answer3['documents'][0].meta

In [None]:
for item in answer3['documents']:
    print(item.meta)