In [24]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain_openai import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [42]:
os.environ['OPENAI_API_KEY'] = "openai_api_key"

In [26]:
llm = OpenAI(temperature=0.9, max_tokens=500)

loader = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/technology/firstcry-ceo-supam-maheshwaris-monthly-remuneration-drops-49-to-rs-8-6-cr-article-12711957.html",
    "https://www.moneycontrol.com/news/technology/ads-cloud-subscriptions-how-google-parent-plans-to-monetise-its-ai-offerings-12708980.html"
])

data = loader.load()

In [27]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap  = 200,)
docs = text_splitter.split_documents(data)
len(docs)

42

In [28]:
docs[36]

Document(page_content="Subscribers will also get access to Gemini's capabilities in the Alphabet's productivity apps such as Gmail, Docs, Slides, Sheets and Meet.\n\nGoogle's cloud business and YouTube is expected to reach a combined annual run rate of over $100 billion by the end of 2024, Pichai said during the call.\n\nAlphabet's financial performance, along with its first-ever cash dividend and a $70 billion stock buyback pushed the company's shares by nearly 12 percent in pre-market trading on April 25. This lifted Alphabet's market cap past $2 trillion.\n\nInvite your friends and family to sign up for MC Tech 3, our daily newsletter that breaks down the biggest tech and startup stories of the day\n\nDAILY-EVENING\n\nSUBSCRIBE\n\nEnd your day with a breakdown of the biggest tech and startup stories in your inbox\n\nDAILY-EVENING\n\nSUBSCRIBE\n\nEnd your day with a breakdown of the biggest tech and startup stories in your inbox\n\nVikas SN               covers Big Tech, streaming, s

In [29]:
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])

In [30]:
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [31]:
# save this vector_index to a locak pickel file which then will act as local db
vectorindex_openai.save_local("faiss_store")

In [32]:
# use the stored data later
vector_index_new=FAISS.load_local("faiss_store", embeddings, allow_dangerous_deserialization=True)

In [33]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex_openai.as_retriever())

In [34]:
chain



In [40]:
query = "what are the concerns on AI?"

# langchain.debug=False

chain({"question": query}, return_only_outputs=True)

{'answer': ' Concerns surrounding AI include how companies will monetize their AI initiatives, competition in the market, and the potential consequences of using generative AI. Google CEO Sundar Pichai also mentioned being measured and prioritizing website traffic with their AI advancements, and introducing a subscription plan for advanced AI-powered chatbot access. \n',
 'sources': 'https://www.moneycontrol.com/news/technology/ads-cloud-subscriptions-how-google-parent-plans-to-monetise-its-ai-offerings-12708980.html'}