# Importing Libraries

In [9]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS



In [12]:
api_key = os.getenv("OPENAI_API_KEY")

In [13]:
print(api_key)

None


In [15]:
llm = OpenAI(openai_api_key = api_key, temperature = 0.9, max_tokens = 500)

ValidationError: 1 validation error for OpenAI
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'openai_api_key': None, ...ne, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/value_error

# loading the documents

In [12]:
loaders = UnstructuredURLLoader(urls = ["https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
                                        "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load()
len(data)

2

# Splitting The Text

In [14]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size= 500,
    chunk_overlap = 100
)
docs = text_splitter.split_documents(data)
len(docs)

33

In [18]:
docs[3]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='Reuters\n\nSeptember 12, 2023 / 06:56 IST\n\nWall Street rises as Tesla soars on AI optimism\n\nThe Nasdaq closed sharply higher on Monday as Tesla surged on optimism around artificial intelligence and investors awaited inflation data due later this week.\n\nTeslaÂ (TSLA.O)Â rallied 10% after Morgan StanleyÂ upgradedÂ the electric car maker to "overweight" from "equal-weight," saying its Dojo supercomputer could boost the company\'s market value by nearly $600 billion.')

# Creating OpenAI Embeddings

In [21]:
embeddings = OpenAIEmbeddings()


# Creating Database(FAISS VECTOR INDEX)

In [None]:
vectorindex_openai = FAISS.from_documents(docs, embeddings)

# Storing VectorIndex into Local

In [None]:
with open("vector_index.pkl", 'wb') as f:
    pickle.dump(vectorindex_openai , f)

# Loading .pickle file

In [None]:
with open("vector_index.pkl", "rb") as f:
    vectorIndex = pickle.load(f)

# Creating RetrievalQAWithSourcesChain

In [None]:
chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = vectorIndex.as_retriever())
chain

In [None]:
query = "What is the price of Tiago iCNG?"
langchain.debug = True
chain({'question': query, return_only_outputs=True})

In [None]:
# in pycharm:
#requirements.txt & main.py

In [None]:
main.py

llm = OpenAI(temperature = 0.9, max_tokens = 500)
import os
import streamlit as st
import pickle
import time
import langchain
from langchain.document_loaders import UnstructuredURLLoader
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv



In [None]:
load_dotenv()

In [None]:
st.title("News Research Tool 📈")
st.sidebar.title("News Article Urls")

urls = []
for i in range(3):
    url = st.sidebar.text_input(f"{URL} {i+1}")
    urls.append(url)
    
process_url_clicked = st.sidebar.button("Process URL")


file_path = "faiss_store_openai.pkl"
main_placeholder = st.empty()
if process_url_clicked:
    #load data
    loader = UnstructuredURLLoader(url = urls)
    data = loader.load()
    main_placeholder.text("Data Loading........Started......✅✅✅")
    #split data
    text_splitter = RecursiveCharacterTextSplitter(
        separators = ["/n/n","/n",",","."], 
        chunk_size = 500
    )
    main_placeholder.text("TextSplitter.......Started.........✅✅✅")
    docs = text_splitter.split_document(data)
    #create embeddings
    embeddings = OpenAIEmbeddings()
    vectorstore_openai = FAISS.from_documents(docs, embeddings)
    main_placeholder = st.text("Embedding Vector Started Building......✅✅✅")
    time.sleep(2)
    
    #save this vectorindex file into pickle file
    with ope("file_path","wb") as f:
        pickle.dump(vectorstore_openai, f)
        
        
        
query = main_placeholder.text_input("Question: ")
if query:
    if os.file_path.exists(file_path):
        with open(file_path, "rb") as f:
            vectorstore = pickle.load(f)
            chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = vectorstore.as_retriever())
            result = chain({"Question": query}, return_only_output = True)
            st.header("Answer")
            # {"answer": "", "sources": ""}
            st.write(result["answer"])
            
            
            #display sources if available
            sources = result.get("sources", "")
            if sources:
                st.subheader("Sources:")
                sources_list = sources.split("\n")
                for source in sources_list:
                    st.write(source)
            
            
        
        
        