In [23]:
import json
import os
import sys
import boto3
import streamlit as st

## We will be suing Titan Embeddings Model To generate Embeddings

from langchain_aws import BedrockEmbeddings
from langchain_aws import ChatBedrock

## Data Ingestion

import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Vector Embedding And Vector Store

from langchain_community.vectorstores import FAISS

## LLm Models
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA


In [24]:
bedrock=boto3.client(service_name="bedrock-runtime")
bedrock_embeddings=BedrockEmbeddings(model_id="amazon.titan-embed-text-v2:0",client=bedrock)

In [25]:

loader=PyPDFDirectoryLoader("data")
documents=loader.load()

# - in our testing Character split works better with this PDF data set
text_splitter=RecursiveCharacterTextSplitter(chunk_size=10000,
                                                chunk_overlap=1000)

docs=text_splitter.split_documents(documents)


In [42]:
claude=ChatBedrock(model_id="anthropic.claude-3-sonnet-20240229-v1:0",client=bedrock,
                model_kwargs={'max_tokens':512})

In [None]:
llama=ChatBedrock(model_id="meta.llama3-70b-instruct-v1:0",client=bedrock,
                model_kwargs={'max_gen_len':512})

In [28]:
prompt_template = """

Human: Use the following pieces of context to provide a 
concise answer to the question at the end but usse atleast summarize with 
250 words with detailed explaantions. If you don't know the answer, 
just say that you don't know, don't try to make up an answer.
<context>
{context}
</context

Question: {question}

Assistant:"""

In [29]:

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [30]:
vectorstore_faiss=FAISS.from_documents(
    docs,
    bedrock_embeddings
)
vectorstore_faiss.save_local("faiss_index")

In [31]:
faiss_index = FAISS.load_local("faiss_index", bedrock_embeddings,allow_dangerous_deserialization=True)

In [43]:
qa = RetrievalQA.from_chain_type(
llm=claude,
chain_type="stuff",
retriever=faiss_index.as_retriever(
    search_type="similarity", search_kwargs={"k": 3}
),
return_source_documents=True,
chain_type_kwargs={"prompt": PROMPT}
)
answer=qa({"query":"what is transformer"})

In [44]:
answer['result']

'The Transformer is a novel neural network architecture proposed in the paper for sequence-to-sequence tasks like machine translation. Here is a concise summary explaining the key aspects of the Transformer architecture:\n\nThe Transformer completely eschews recurrent layers and instead relies entirely on an attention mechanism to draw global dependencies between input and output sequences. This allows for much more parallelization than recurrent models which operate sequentially.\n\nThe Transformer follows an encoder-decoder architecture, but both the encoder and decoder are composed of stacked self-attention and feed-forward layers, instead of the recurrent/convolutional layers used in previous architectures.\n\nThe core part is the scaled dot-product attention mechanism that relates different positions of the input/output sequences to compute their representations. Multi-head attention is used to allow attending to information from different representation subspaces.\n\nThe Transfor