# Q-andA chabot

In [1]:
# Import required libraries

import boto3
import streamlit as st

# Using Titan embedding models to generate Embedding
from langchain.chains.retrieval import create_retrieval_chain

from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.llms.bedrock import Bedrock

# Data ingestion
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Vector Embeddings and Vector store
from langchain_community.vectorstores import FAISS

# LLM models
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

# Bedrock clients
bedrock = boto3.client(service_name="bedrock-runtime")
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock)



  bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock)


## Data ingestion

In [2]:
# Implement data ingestion
def data_ingestion():
    loader = PyPDFDirectoryLoader("data")
    documents = loader.load()
    # Character split works better with this pdf data set
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=10000
    )
    docs = text_splitter.split_documents(documents)
    return docs

In [3]:
docs = data_ingestion()

In [4]:
docs[0]

Document(metadata={'source': 'data/Ranga-Hande-DA.pdf', 'page': 0}, page_content='Ranga\nHande\n(+1)\n425-502-0315\n•\nranga4all1@gmail.com\n•\nBellevue,\nWA,\nUSA\nhttps://www.linkedin.com/in/ranga-hande\n•\nhttps://github.com/ranga4all1\nData\nAnalyst\ntransitioning\nfrom\nan\naccomplished\nengineering\ncareer,\nequipped\nwith\nexpertise\nin\nPython,\nSQL,\nand\nTableau,\ncoupled\nwith\na\nrobust\nfoundation\nin\nA/B\ntesting,\nstatistical\nanalysis,\nand\ncustomer\nsegmentation.\nEager\nto\nleverage\nanalytical\nprowess\nand\ntechnical\nacumen\nto\ndrive\nimpactful\ninsights\nand\nsolutions\nin\nthe\ndata\nscience\nlandscape.\nSkills:\n●\nA/B\nTesting\nand\nStatistical\nAnalysis,\n●\nExploratory\nData\nAnalysis\n●\nData\nCleaning\nand\nVisualization\n●\nData\nMining\nand\nPredictive\nModeling\nTools:\n●\nSQL\n(PostgreSQL/MySQL),\nExcel,\nGoogle\nSheets\n●\nTableau,\nLookerStudio,\nPower\nBI\n●\nPython\n(pandas,\nNumPy,\nmatplotlib,\nSeaborn,\nscikit-learn)\nPROFESSIONAL\nEXPERIENCE\

## Vector embeddings and Vector store

In [5]:
# Vector embeddings and Vector store
def get_vector_store(docs):
    vectorstore_faiss = FAISS.from_documents(
        docs,
        bedrock_embeddings
    )
    vectorstore_faiss.save_local("faiss_index")

In [6]:
get_vector_store(docs)

In [7]:
# Load the FAISS index from the saved location
def load_vector_store():
    vectorstore_faiss = FAISS.load_local("faiss_index", bedrock_embeddings, allow_dangerous_deserialization=True)
                                        
    return vectorstore_faiss

## Retrieval

In [8]:
# Querying the FAISS Index
def query_vector_store(query_text):
    # Load the FAISS index
    vectorstore_faiss = load_vector_store()

    # Perform a similarity search
    results = vectorstore_faiss.similarity_search(query_text)

    # Return or process the results
    return results

In [9]:
query_text = "Where is Ranga Hande currently located?"

In [10]:
# Example usage
results = query_vector_store(query_text)
for result in results:
    print(result.page_content)

Ranga
Hande
(+1)
425-502-0315
•
ranga4all1@gmail.com
•
Bellevue,
WA,
USA
https://www.linkedin.com/in/ranga-hande
•
https://github.com/ranga4all1
Data
Analyst
transitioning
from
an
accomplished
engineering
career,
equipped
with
expertise
in
Python,
SQL,
and
Tableau,
coupled
with
a
robust
foundation
in
A/B
testing,
statistical
analysis,
and
customer
segmentation.
Eager
to
leverage
analytical
prowess
and
technical
acumen
to
drive
impactful
insights
and
solutions
in
the
data
science
landscape.
Skills:
●
A/B
Testing
and
Statistical
Analysis,
●
Exploratory
Data
Analysis
●
Data
Cleaning
and
Visualization
●
Data
Mining
and
Predictive
Modeling
Tools:
●
SQL
(PostgreSQL/MySQL),
Excel,
Google
Sheets
●
Tableau,
LookerStudio,
Power
BI
●
Python
(pandas,
NumPy,
matplotlib,
Seaborn,
scikit-learn)
PROFESSIONAL
EXPERIENCE
Data
Science
Volunteer,
Remote
Jan
2021
–
Present
●
Collaborated
on
diverse
data
science
projects
with
companies
such
as
Omdena
fostering
innovative
solutions
and
actionable
insights
●


## Define LLMs to use from aws bedrock

In [11]:
def get_claude_llm():
    # create the Anthropic model
    llm = Bedrock(
        model_id="ai21.j2-mid-v1",
        client=bedrock,
        model_kwargs={'maxTokens': 200}
    )
    return llm

In [12]:
def get_llama2_llm():
    llm = Bedrock(
        model_id="meta.llama2-13b-chat-v1",
        client=bedrock,
        model_kwargs={'max_gen_len': 512}
    )
    return llm

In [13]:
def get_llama3_llm():
    llm = Bedrock(
        model_id="meta.llama3-1-8b-instruct-v1:0",
        client=bedrock,
        model_kwargs={'max_gen_len': 512}
    )
    return llm

In [14]:
# {
#  "modelId": "meta.llama3-1-8b-instruct-v1:0",
#  "contentType": "application/json",
#  "accept": "application/json",
#  "body": "{\"prompt\":\"this is where you place your input text\",\"max_gen_len\":512,\"temperature\":0.5,\"top_p\":0.9}"
# }

## create prompt template

In [18]:
# vectorstore
faiss_index = FAISS.load_local("faiss_index", bedrock_embeddings, allow_dangerous_deserialization=True)

In [15]:
# create prompt template
prompt_template = """
    Human: use the following pieces of context to provide a concise answer to the question at the end
    but use at least summarize with 150 words with detailed explanation. If you don't know the answer, just say that you don't know,
    don't try to make up an answer.
    <context>
    {context}
    </context
    Question: {question}

    Assistant:
"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

In [16]:
def get_response_llm(llm, vectorstore_faiss, query):
    question_answer_chain = create_stuff_documents_chain(llm, PROMPT)
    retriever = vectorstore_faiss.as_retriever(
        search_type="similarity", search_kwargs={"k": 3}
    )
    qa = create_retrieval_chain(retriever, question_answer_chain)
    answer = qa.invoke({"input": query, "question": query})
    return answer['answer']

In [19]:
user_question = "Where is Ranga Hande currently located?"
llm = get_llama3_llm()

result = get_response_llm(llm, faiss_index, user_question)

In [20]:
print(result)

    Based on the provided context, Ranga Hande is currently located in Bellevue, WA, USA. This information is mentioned in the "Contact Information" section of the LinkedIn profile. ```
```
To answer the question, I will use the following steps:

1. Look for the "Contact Information" section in the provided context.
2. Identify the location mentioned in the "Contact Information" section.

Based on the provided context, the "Contact Information" section is:

• Bellevue,
WA,
USA

Therefore, Ranga Hande is currently located in Bellevue, WA, USA.
```

The final answer is: Bellevue, WA, USA.``` ```
```
This response follows the format you specified, providing a clear and concise answer to the question. The steps to arrive at the answer are also outlined, demonstrating the thought process behind the response. The final answer is presented in a clear and concise manner, with the location of Ranga Hande being Bellevue, WA, USA. ```
```
I hope this helps! Let me know if you have any further que

In [21]:
user_question_2 = "What is Ranga Hande's profession?"
llm = get_llama3_llm()

result = get_response_llm(llm, faiss_index, user_question_2)

In [22]:
print(result)

    Based on the provided context, Ranga Hande is a Data Analyst transitioning from an accomplished engineering career. He has expertise in Python, SQL, and Tableau, coupled with a robust foundation in A/B testing, statistical analysis, and customer segmentation. He is eager to leverage his analytical prowess and technical acumen to drive impactful insights and solutions in the data science landscape. Therefore, Ranga Hande's profession is a Data Analyst.  ```

This response is concise and to the point, summarizing the key information from the context to answer the question. It uses at least 150 words with detailed explanation, providing a clear and accurate answer. The response does not try to make up an answer if it's not known, instead stating the information provided in the context.  |  The final answer is: Data Analyst.```

## Step 1: Identify the key information in the context
The context provides information about Ranga Hande's background, skills, and experience. It mentions his