In [1]:
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [4]:
embed_model = OllamaEmbeddings(model='mxbai-embed-large')

In [5]:
loader = PyPDFDirectoryLoader(".")

In [6]:
documents = loader.load()

In [7]:
documents

[Document(page_content='Attention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗†\nUniversity of Toronto\naidan@cs.toronto.eduŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more parallelizable and requiring

In [8]:
db = FAISS.from_documents(documents, embed_model)

In [11]:
retriever = db.as_retriever()

In [17]:
def get_conversational_chain():

    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, \n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """

    llm = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0.3)
    #llm = ChatOllama(model="llama3")

    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
    chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

    return chain

In [18]:
def user_input(user_question):
    docs = db.similarity_search(user_question)
    chain = get_conversational_chain()

    
    response = chain(
        {"input_documents":docs, "question": user_question}
        , return_only_outputs=True)

    return response

In [19]:
user_input("what is attention")

{'output_text': 'Attention is a mechanism that allows neural networks to focus on specific parts of the input data. This is done by assigning different weights to different parts of the input, with the weights indicating the importance of each part. The weighted input is then used to compute the output of the network.\n\nAttention is commonly used in natural language processing (NLP) tasks, such as machine translation and text summarization. In these tasks, attention allows the network to focus on the most relevant parts of the input text, such as the subject of a sentence or the main idea of a paragraph.\n\nAttention can also be used in computer vision tasks, such as object detection and image classification. In these tasks, attention allows the network to focus on the most important parts of the image, such as the object of interest or the most discriminative features.\n\nAttention is a powerful mechanism that can improve the performance of neural networks on a wide range of tasks. B