In [None]:

import streamlit as st
from langchain_core.messages import AIMessage,HumanMessage
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever,create_retrieval_chain
from langchain.chains.combine_documents import  create_stuff_documents_chain
load_dotenv()

def get_urls():
    return["https://en.wikipedia.org/wiki/2024_United_States_presidential_election,"
           
        
    ]
def get_vectorstore_from_url(url):
    url="https://en.wikipedia.org/wiki/2024_United_States_presidential_election"
    loader=WebBaseLoader(url)
    document=loader.load()
    text_splitter=RecursiveCharacterTextSplitter()
    document_chunks=text_splitter.split_documents(document)
    vector_store=Chroma.from_documents(document_chunks,OpenAIEmbeddings()) 
    return vector_store

def context_retriever_chain(vector_store):
    llm=ChatOpenAI()
    retriever=vector_store.as_retriever()
    prompt=ChatPromptTemplate.from_messages(
        [MessagesPlaceholder(variable_name="chat_history"), 
         ("user", "{input}"), 
         
         ])
    retriever_chain=create_history_aware_retriever(llm,retriever,prompt)
    return retriever_chain


def get_conversion_rag_chain(retriever_chain):
    llm = ChatOpenAI()
    prompt = ChatPromptTemplate.from_messages([
        ('system', "answer the following questions based on the below context:\n\n{context}"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ])
    
    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
    # Correctly call create_retrieval_chain with the retriever_chain and stuff_documents_chain
    combined_retrieval_chain = create_retrieval_chain(retriever_chain, stuff_documents_chain)
    return combined_retrieval_chain

def get_response(user_input,conversation_rag_chain):
    conversation_rag_chain=conversation_rag_chain
    response = conversation_rag_chain.invoke({
                "chat_history": [],
                "input": user_input
            })
    return response['answer']
    # context=""
    # for res in response:
    #     context+=res.page_content
    # llm=ChatOpenAI()
    # return llm.invoke(f"answer the following questions based on the below context:\n\n{context}")


def initialize_session_state():
    vector_store, retriever_chain, conversion_rag_chain, urls = run()
    st.session_state["init"] = True
    st.session_state["vector_store"] = vector_store
    st.session_state["retriever_chain"] = retriever_chain
    st.session_state["conversion_rag_chain"] = conversion_rag_chain
    st.session_state["urls"] = urls
    st.session_state['chat_history'] = [AIMessage("Hello! I am a bot. Ask me anything!")]

def run():
    urls=get_urls()
    vector_store=get_vectorstore_from_url(urls)
    retriever_chain=context_retriever_chain(vector_store=vector_store)
    conversation_rag_chain = get_conversion_rag_chain(retriever_chain=retriever_chain)
    
    return conversation_rag_chain,vector_store,retriever_chain,urls


In [None]:
conversation_rag_chain,vector_store,retriever_chain,urls=run()

In [None]:
response=get_response("What is the 2024 United States presidential election?",conversation_rag_chain)

In [None]:
response['answer']

In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv
load_dotenv()
_urls=["https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfmaude/search.cfm",
"https://en.wikipedia.org/wiki/2024_United_States_presidential_election",
"https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/pmn.cfm",
"https://pubmed.ncbi.nlm.nih.gov/",
"https://ec.europa.eu/tools/eu"]

def get_vectorstore_from_url(urls):
    # get the text in document form
    all_documents = []
    for url in urls:
        loader = WebBaseLoader(url)
        document = loader.load()
    
    # split the document into chunks
        text_splitter = RecursiveCharacterTextSplitter()
        document_chunks = text_splitter.split_documents(document)
        all_documents.extend(document_chunks)
    
    # create a vectorstore from the chunks
    vector_store = Chroma.from_documents(documents=all_documents, embedding=OpenAIEmbeddings(),persist_directory="website_db")

    return vector_store




In [2]:
vector_store=get_vectorstore_from_url(_urls)

In [None]:
db2=Chroma(persist_directory="website_db",embedding_function=OpenAIEmbeddings())

In [None]:
res=db2.similarity_search("What is the 2024 United States presidential election?",k=3)

In [None]:
model=ChatOpenAI()


In [3]:
from youtube_transcript_api import YouTubeTranscriptApi 

  
# assigning srt variable with the list 
# of dictionaries obtained by the get_transcript() function
url='https://www.youtube.com/watch?v=GanTUWLUUWQ'
id=url.split('=')[1]
srt = YouTubeTranscriptApi.get_transcript(id)

  
# prints the result
text=''
for i in srt:
    text+=i['text']+' '
print((text))

this project is incredible let me show you what is essentially a completely open-source version of perplexity and if you're not familiar with perplexity this is it it's basically an answer engine and a lot of people are using it instead of Google Search now you enter your query and it uses artificial intelligence to put together a page to answer your question directly let me show you so I'm going to ask how do I make ramen and so we got pictures of ramen over on the right side we have the answer right here step by step it also has the sources that it pulled this information from so it basically is Google search but instead of having to click through a bunch of links it gives you the answer directly and it's super useful but what if I wanted to do this completely open source and even run it using open source models well that's what this is I'm on Local Host 3000 and this is called answer engine so let me ask the same question how do I make wrong all right we got pictures we have videos 

In [4]:
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
    YoutubeAudioLoader,
)
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import OpenAIWhisperParser

In [10]:
loader = GenericLoader(YoutubeAudioLoader(['https://www.youtube.com/watch?v=GanTUWLUUWQ'], '.'), OpenAIWhisperParser())

In [11]:
docs = loader.load()

[youtube] Extracting URL: https://www.youtube.com/watch?v=GanTUWLUUWQ
[youtube] GanTUWLUUWQ: Downloading webpage
[youtube] GanTUWLUUWQ: Downloading ios player API JSON
[youtube] GanTUWLUUWQ: Downloading android player API JSON
[youtube] GanTUWLUUWQ: Downloading m3u8 information
[info] GanTUWLUUWQ: Downloading 1 format(s): 140
[download] Destination: ./Answer Engine Tutorial： The Open-Source Perplexity Search Replacement.m4a
[download] 100% of    8.63MiB in 00:00:01 at 6.78MiB/s   


ERROR: Postprocessing: ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location


DownloadError: ERROR: Postprocessing: ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location