In [None]:

import streamlit as st
from langchain_core.messages import AIMessage,HumanMessage
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever,create_retrieval_chain
from langchain.chains.combine_documents import  create_stuff_documents_chain
load_dotenv()

def get_urls():
    return["https://en.wikipedia.org/wiki/2024_United_States_presidential_election,"
           
        
    ]
def get_vectorstore_from_url(url):
    url="https://en.wikipedia.org/wiki/2024_United_States_presidential_election"
    loader=WebBaseLoader(url)
    document=loader.load()
    text_splitter=RecursiveCharacterTextSplitter()
    document_chunks=text_splitter.split_documents(document)
    vector_store=Chroma.from_documents(document_chunks,OpenAIEmbeddings()) 
    return vector_store

def context_retriever_chain(vector_store):
    llm=ChatOpenAI()
    retriever=vector_store.as_retriever()
    prompt=ChatPromptTemplate.from_messages(
        [MessagesPlaceholder(variable_name="chat_history"), 
         ("user", "{input}"), 
         
         ])
    retriever_chain=create_history_aware_retriever(llm,retriever,prompt)
    return retriever_chain


def get_conversion_rag_chain(retriever_chain):
    llm = ChatOpenAI()
    prompt = ChatPromptTemplate.from_messages([
        ('system', "answer the following questions based on the below context:\n\n{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ])
    
    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
    # Correctly call create_retrieval_chain with the retriever_chain and stuff_documents_chain
    combined_retrieval_chain = create_retrieval_chain(retriever_chain, stuff_documents_chain)
    return combined_retrieval_chain

def get_response(user_input,conversation_rag_chain):
    conversation_rag_chain=conversation_rag_chain
    response = conversation_rag_chain.invoke({
                "chat_history": [],
                "input": user_input
            })
    return response['answer']
    # context=""
    # for res in response:
    #     context+=res.page_content
    # llm=ChatOpenAI()
    # return llm.invoke(f"answer the following questions based on the below context:\n\n{context}")


def initialize_session_state():
    vector_store, retriever_chain, conversion_rag_chain, urls = run()
    st.session_state["init"] = True
    st.session_state["vector_store"] = vector_store
    st.session_state["retriever_chain"] = retriever_chain
    st.session_state["conversion_rag_chain"] = conversion_rag_chain
    st.session_state["urls"] = urls
    st.session_state['chat_history'] = [AIMessage("Hello! I am a bot. Ask me anything!")]

def run():
    urls=get_urls()
    vector_store=get_vectorstore_from_url(urls)
    retriever_chain=context_retriever_chain(vector_store=vector_store)
    conversation_rag_chain = get_conversion_rag_chain(retriever_chain=retriever_chain)
    
    return conversation_rag_chain,vector_store,retriever_chain,urls


In [None]:
conversation_rag_chain,vector_store,retriever_chain,urls=run()

In [None]:
response=get_response("What is the 2024 United States presidential election?",conversation_rag_chain)

In [None]:
response['answer']

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv
load_dotenv()
_urls=["https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/search.cfm",
"https://en.wikipedia.org/wiki/2024_United_States_presidential_election",
"https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/pmn.cfm",
"https://pubmed.ncbi.nlm.nih.gov/",
"https://ec.europa.eu/tools/eu"]

def get_vectorstore_from_url(urls):
    # get the text in document form
    all_documents = []
    for url in urls:
        loader = WebBaseLoader(url)
        document = loader.load()
    
    # split the document into chunks
        text_splitter = RecursiveCharacterTextSplitter()
        document_chunks = text_splitter.split_documents(document)
        all_documents.extend(document_chunks)
    
    # create a vectorstore from the chunks
    vector_store = Chroma.from_documents(documents=all_documents, embedding=OpenAIEmbeddings(),persist_directory="website_db")

    return vector_store




In [None]:
vector_store=get_vectorstore_from_url(_urls)

In [None]:
db2=Chroma(persist_directory="website_db",embedding_function=OpenAIEmbeddings())

In [None]:
res=db2.similarity_search("What is the 2024 United States presidential election?",k=3)

In [None]:
model=ChatOpenAI()


In [None]:
from youtube_transcript_api import YouTubeTranscriptApi 

  
# assigning srt variable with the list 
# of dictionaries obtained by the get_transcript() function
url='https://www.youtube.com/watch?v=GanTUWLUUWQ'
id=url.split('=')[1]
srt = YouTubeTranscriptApi.get_transcript(id)

  
# prints the result
text=''
for i in srt:
    text+=i['text']+' '
print((text))

In [None]:
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
    YoutubeAudioLoader,
)
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import OpenAIWhisperParser

In [None]:
loader = GenericLoader(YoutubeAudioLoader(['https://www.youtube.com/watch?v=GanTUWLUUWQ'], '.'), OpenAIWhisperParser())

In [None]:
docs = loader.load()

In [None]:
import streamlit as st
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
import os


load_dotenv()
urls=[
# "https://en.wikipedia.org/wiki/2024_United_States_presidential_election",
"https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/search.cfm",
"https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/pmn.cfm",
"https://pubmed.ncbi.nlm.nih.gov/",
"https://ec.europa.eu/tools/eu"]
documents=[]
for url in urls:
            loader = WebBaseLoader(url)
            document = loader.load()
            documents.append(document[0])

In [40]:
for doc in documents:
    source=(doc.metadata['source'])
    source.replace("/","\\")
    print(doc.page_content)
    print("\n")

https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/search.cfm





 



MAUDE - Manufacturer and User Facility Device Experience










































Quick Links: Skip to main page content
Skip to Search
Skip to Topics Menu
Skip to Common Links






































                U.S. Food & Drug Administration
              

 







Follow FDA
En Español





Search FDA





 







Home
Food
Drugs
Medical Devices
Radiation-Emitting Products
Vaccines, Blood & Biologics
Animal & Veterinary
Cosmetics
Tobacco Products


 



 
 

MAUDE - Manufacturer and User Facility Device Experience





















FDA Home
Medical Devices
Databases





 -

















			The MAUDE database houses medical device reports submitted to the FDA by mandatory reporters 1 (manufacturers, importers and device user facilities) and voluntary reporters such as health care professionals, patients and consumers.
			


Learn More


Disclaimer







Search

In [None]:
for doc in documents:
    #write the doc.page_content to a file
    with open(f"{doc.metadata['source']}.txt", "w") as f:
        f.write(doc.page_content)
    
