In [None]:
import os
import sys

# Get the current working directory and add the parent directory to the Python path
current_working_directory = os.getcwd()
print(os.path.join(current_working_directory, ".."))
sys.path.append(os.path.join(current_working_directory, ".."))

In [None]:
from decouple import AutoConfig
config = AutoConfig(search_path='./../.env')

In [None]:
os.environ["AZURE_OPENAI_API_KEY"] = config('OPENAI_API_KEY')
os.environ["AZURE_OPENAI_ENDPOINT"] = config('AZURE_ENDPOINT')

## Loading Generative Model

In [None]:
from models.llm import LLM

model = LLM('gpt-4')
llm = model.load_model()

## Loading Embedding Model

In [None]:
from models.em import EM
model = EM('ada-2')
embeddings = model.load_model()

## Data Ingestion

In [None]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://www.moneycontrol.com/news/business/stocks/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer("li",
            class_=("clearfix")
        )
    ),
)

docs = loader.load()

In [None]:
docs

### Embedding and Storing Documents in Vector Store

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators=[" \n\n ", " \n "],
    chunk_size=400,
    chunk_overlap=0,
    length_function=len,
)

In [None]:
docs

In [None]:
from utils.vectordb import VectorDB
vectorstore = VectorDB()
db = vectorstore.create_and_dump(embeddings, text_splitter=text_splitter, inp_text=docs)

In [None]:
question = "Which stocks increased despite promoters selling?"
top_k = db.similarity_search(question)
top_k

## Answer Generation

In [None]:
from utils.vectordb import VectorDB

vectorstore = VectorDB()
retriever = vectorstore.load_retriever(embeddings)

In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever, return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


In [None]:
question = "Which company is discussed in the context?"
result = qa_chain.invoke({"query": question})
result["result"]

In [None]:
result

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum and keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{summaries}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_src_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm,
    retriever=retriever, return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
question = "Which stocks looks bullish?"
result = qa_src_chain.invoke({"question": question})
result