In [16]:
import os
from dotenv import load_dotenv


load_dotenv()

os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [17]:
from selenium import webdriver
from selenium.webdriver.common.by import By

def get_website_content_to_txt(url):
    driver = webdriver.Chrome()
    
    try:
        driver.get(url)
        
        elements = driver.find_elements(By.XPATH, '//*[not(*) and normalize-space()]')
        
        txt_file = 'website_content.txt'
        with open(txt_file, 'w', encoding='utf-8') as f:
            for element in elements:
                text = element.text.strip()
                if text:  
                    f.write(text + '\n')
        
        print(f"Website content has been saved to '{txt_file}'.")
        
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()

if __name__ == "__main__":
    url = input("Enter the URL of the website: ")
    get_website_content_to_txt(url)


Website content has been saved to 'website_content.txt'.


In [None]:
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from selenium.webdriver.chrome.service import Service
# from webdriver_manager.chrome import ChromeDriverManager

# # Initialize ChromeOptions
# chrome_options = webdriver.ChromeOptions()

# chrome_options.add_experimental_option("detach", True)

# # Function to scrape the full content from a given URL
# def scrape_full_page(url):
#     # Initialize WebDriver
#     driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    
#     try:
#         # Open the URL
#         driver.get(url)
        
#         # Extract the full HTML content of the page
#         page_text = driver.find_element("tag name","body").text
        
#         # Print the full page content
#         print(page_text)
        
#         # Optionally, save the content to an HTML file
#         with open("scraped_page.html", "w", encoding="utf-8") as file:
#             file.write(page_text)
        
#         print("Page content has been saved to 'scraped_page.text'.")
        
#     except Exception as e:
#         print(f"An error occurred: {e}")
#     finally:
#         # Close the WebDriver (uncomment if you want the browser to close automatically)
#         driver.quit()

# # Input URL
# url = input("Enter the URL to scrape: ")

# # Scrape the full content from the given URL
# data=scrape_full_page(url)

# if data:
#     print("scrapped Data: ")
#     print(data)

In [18]:
from langchain.document_loaders import TextLoader
 
loader = TextLoader("website_content.txt")
document = loader.load()

In [19]:
print(document)

[Document(page_content='Register\nLogin\nSiePortal\nThe integrated platform for your product selection, buying and support workflow - bringing together Industry Mall and Online Support.\nSiePortal\nRegion and language\nDeutsch\nContact\nHelp\nSupport Request\nHome\nSupport\nForum\nConferences\nProduct Conferences\nDecentral Peripherie\nProducts & Services\nSupport\nMySiePortal\nCart\nNavigation\nConferences\nDP- DP coupler to exchage process data with ABB dcs on profibus protocol\njeetendra\nRating\n(0)\nThanks\n0\nActions\nNew post\n6 Entries\n7/14/2014 10:48 AM\n(0)\njeetendra\nExperienced Member\nJoined: 8/30/2010\nLast visit: 6/4/2024\nShare this page:\nFavorites (product support and services)\nMy requests\nCAx downloads\nMy products\nAll about Decentral Peripherie\nPresales info\nCatalog and ordering system online\nTechnical info\nSupport\nTraining\nContact & partners\nFollow us on\nTwitter\nIndustry Mall and SIOS are now SiePortal\nSiePortal combines our Industry Mall with the In

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
text_chunks = text_splitter.split_documents(document)
len(text_chunks)

3

In [21]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2",
    multi_process=True,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity
)



In [22]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

embeddings = HuggingFaceInferenceAPIEmbeddings(api_key='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',model_name="sentence-transformers/all-MiniLM-l6-v2")

In [23]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(text_chunks, embedding_model)

In [24]:
from langchain_community.llms import HuggingFaceEndpoint
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
llm = HuggingFaceEndpoint(
    repo_id=repo_id, temperature= 0.75, model_kwargs= {'max_length': 8192}, max_new_tokens=4096)
                                                       

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\shiva\.cache\huggingface\token
Login successful


In [25]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", 
                                 retriever=db.as_retriever(search_kwargs={"k": 2}))

In [26]:
query = input("what is your query?")
ans = qa.invoke(query)

In [27]:
print(ans)

{'query': 'exchange process data with abb dcs', 'result': " To exchange process data with an ABB DCS (Distributed Control System) using the Decentral Peripherie product from SIE, you would typically follow these steps:\n\n1. Ensure that both the Decentral Peripherie device and the ABB DCS support the Profibus communication protocol.\n2. Connect the Decentral Peripherie device to the Profibus network and configure it according to the manufacturer's instructions.\n3. Configure the ABB DCS to accept data from the Decentral Peripherie device using the Profibus protocol.\n4. Define the data mapping between the Decentral Peripherie device and the ABB DCS, including the data type, address, and format.\n5. Test the communication between the Decentral Peripherie device and the ABB DCS to ensure that data is being exchanged correctly.\n\nIf you encounter any issues during the setup or configuration process, you can contact SIE's support team for assistance. They may be able to provide additional