In [8]:
import os
import pickle
import time

import streamlit as st

import langchain
from langchain_openai import OpenAI
from langchain_classic.chains import RetrievalQAWithSourcesChain
from langchain_classic.chains.qa_with_sources.loading import load_qa_with_sources_chain 
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

from secret_key import openapi_key

In [9]:
os.environ['OPENAI_API_KEY'] = openapi_key

In [15]:
llm = OpenAI(temperature=0.9, max_tokens=500)

loaders = UnstructuredURLLoader(
    urls = [
        "https://business.bofa.com/en-us/content/market-strategies-insights/weekly-market-recap-report.html",
        "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
    ]
)

data = loaders.load()
len(data)

2

In [16]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)

In [17]:
docs = text_splitter.split_documents(data)
len(docs)

18

In [22]:
embeddings = OpenAIEmbeddings()

In [23]:
vectorIndex_openai = FAISS.from_documents(docs,embeddings)

In [25]:
index_path = "faiss_vector_index"
vectorIndex_openai.save_local(index_path)
print(f"Vector store saved to {index_path}/")

Vector store saved to faiss_vector_index/


In [27]:
vectorIndex_openai = FAISS.load_local(
    "faiss_vector_index",
    embeddings,
    allow_dangerous_deserialization=True
)
print("Vector store loaded")

Vector store loaded


In [31]:
chain = RetrievalQAWithSourcesChain.from_llm(llm = llm, retriever = vectorIndex_openai.as_retriever())
chain

