# Q-andA chabot

In [2]:
# Import required libraries

import warnings
from pydantic import BaseModel
# from langchain.embeddings import BedrockEmbeddings

import boto3
import streamlit as st

# Using Titan embedding models to generate Embedding
from langchain.chains.retrieval import create_retrieval_chain

from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.llms.bedrock import Bedrock

# Data ingestion
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

# Vector Embeddings and Vector store
from langchain_community.vectorstores import FAISS

# LLM models
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain


# Suppress the pydantic warnings about protected namespaces
warnings.filterwarnings("ignore", message="Field .* has conflict with protected namespace.*")

# Suppress the LangChain deprecation warning
warnings.filterwarnings("ignore", category=DeprecationWarning, message="The class `BedrockEmbeddings` was deprecated.*")

# Bedrock clients
bedrock = boto3.client(service_name="bedrock-runtime")
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock)

## Data ingestion

In [3]:
# Implement data ingestion
def data_ingestion():
    loader = PyPDFDirectoryLoader("data")
    documents = loader.load()
    # Character split works better with this pdf data set
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000,
        chunk_overlap=10000
    )
    docs = text_splitter.split_documents(documents)
    return docs

In [4]:
docs = data_ingestion()

In [5]:
docs[0]

Document(metadata={'source': 'data/Ranga-Hande-DA.pdf', 'page': 0}, page_content='Ranga\nHande\n(+1)\n425-502-0315\n•\nranga4all1@gmail.com\n•\nBellevue,\nWA,\nUSA\nhttps://www.linkedin.com/in/ranga-hande\n•\nhttps://github.com/ranga4all1\nData\nAnalyst\ntransitioning\nfrom\nan\naccomplished\nengineering\ncareer,\nequipped\nwith\nexpertise\nin\nPython,\nSQL,\nand\nTableau,\ncoupled\nwith\na\nrobust\nfoundation\nin\nA/B\ntesting,\nstatistical\nanalysis,\nand\ncustomer\nsegmentation.\nEager\nto\nleverage\nanalytical\nprowess\nand\ntechnical\nacumen\nto\ndrive\nimpactful\ninsights\nand\nsolutions\nin\nthe\ndata\nscience\nlandscape.\nSkills:\n●\nA/B\nTesting\nand\nStatistical\nAnalysis,\n●\nExploratory\nData\nAnalysis\n●\nData\nCleaning\nand\nVisualization\n●\nData\nMining\nand\nPredictive\nModeling\nTools:\n●\nSQL\n(PostgreSQL/MySQL),\nExcel,\nGoogle\nSheets\n●\nTableau,\nLookerStudio,\nPower\nBI\n●\nPython\n(pandas,\nNumPy,\nmatplotlib,\nSeaborn,\nscikit-learn)\nPROFESSIONAL\nEXPERIENCE\

## Vector embeddings and Vector store

In [7]:
# Vector embeddings and Vector store
def get_vector_store(docs):
    vectorstore_faiss = FAISS.from_documents(
        docs,
        bedrock_embeddings
    )
    vectorstore_faiss.save_local("faiss_index")

In [8]:
get_vector_store(docs)

In [9]:
# Load the FAISS index from the saved location
def load_vector_store():
    vectorstore_faiss = FAISS.load_local("faiss_index", bedrock_embeddings, allow_dangerous_deserialization=True)
                                        
    return vectorstore_faiss

## Retrieval

In [10]:
# Querying the FAISS Index
def query_vector_store(query_text):
    # Load the FAISS index
    vectorstore_faiss = load_vector_store()

    # Perform a similarity search
    results = vectorstore_faiss.similarity_search(query_text)

    # Return or process the results
    return results

In [11]:
query_text = "Where is Ranga Hande currently located?"

In [12]:
# Example usage
results = query_vector_store(query_text)
for result in results:
    print(result.page_content)

Ranga
Hande
(+1)
425-502-0315
•
ranga4all1@gmail.com
•
Bellevue,
WA,
USA
https://www.linkedin.com/in/ranga-hande
•
https://github.com/ranga4all1
Data
Analyst
transitioning
from
an
accomplished
engineering
career,
equipped
with
expertise
in
Python,
SQL,
and
Tableau,
coupled
with
a
robust
foundation
in
A/B
testing,
statistical
analysis,
and
customer
segmentation.
Eager
to
leverage
analytical
prowess
and
technical
acumen
to
drive
impactful
insights
and
solutions
in
the
data
science
landscape.
Skills:
●
A/B
Testing
and
Statistical
Analysis,
●
Exploratory
Data
Analysis
●
Data
Cleaning
and
Visualization
●
Data
Mining
and
Predictive
Modeling
Tools:
●
SQL
(PostgreSQL/MySQL),
Excel,
Google
Sheets
●
Tableau,
LookerStudio,
Power
BI
●
Python
(pandas,
NumPy,
matplotlib,
Seaborn,
scikit-learn)
PROFESSIONAL
EXPERIENCE
Data
Science
Volunteer,
Remote
Jan
2021
–
Present
●
Collaborated
on
diverse
data
science
projects
with
companies
such
as
Omdena
fostering
innovative
solutions
and
actionable
insights
●


## Define LLMs to use from aws bedrock

In [13]:
# {
#  "modelId": "meta.llama3-1-8b-instruct-v1:0",
#  "contentType": "application/json",
#  "accept": "application/json",
#  "body": "{\"prompt\":\"this is where you place your input text\",\"max_gen_len\":512,\"temperature\":0.5,\"top_p\":0.9}"
# }

In [14]:
def get_llama3_llm():
    llm = Bedrock(
        model_id="meta.llama3-1-8b-instruct-v1:0",
        client=bedrock,
        model_kwargs={
            "temperature": 0.5,
            "max_gen_len": 256,
            # "top_p": 0.5,
        }
    )
    return llm

In [15]:
# {
#  "modelId": "amazon.titan-text-lite-v1",
#  "contentType": "application/json",
#  "accept": "application/json",
#  "body": "{\"inputText\":\"this is where you place your input text\",\"textGenerationConfig\":{\"maxTokenCount\":4096,\"stopSequences\":[],\"temperature\":0,\"topP\":1}}"
# }

In [16]:
def get_titan_lite_llm():
    llm = Bedrock(
        model_id="amazon.titan-text-lite-v1",
        client=bedrock,
        model_kwargs={
            "temperature": 0.5,
            "maxTokenCount": 200,
        }
    )
    return llm

In [17]:
# {
#  "modelId": "mistral.mistral-7b-instruct-v0:2",
#  "contentType": "application/json",
#  "accept": "application/json",
#  "body": "{\"prompt\":\"<s>[INST] this is where you place your input text [/INST]\", \"max_tokens\":200, \"temperature\":0.5, \"top_p\":0.9, \"top_k\":50}"
# }

In [18]:
def get_mistral_llm():
    llm = Bedrock(
        model_id="mistral.mistral-7b-instruct-v0:2",
        client=bedrock,
        model_kwargs={
            "temperature": 0.5,
            "max_tokens": 200,
        }
    )
    return llm

In [19]:
# {
#   "modelId": "anthropic.claude-v2:1",
#   "contentType": "application/json",
#   "accept": "*/*",
#   "body": "{\"prompt\":\"\\n\\nHuman: Hello world\\n\\nAssistant:\",\"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_k\":250,\"top_p\":1,\"stop_sequences\":[\"\\n\\nHuman:\"],\"anthropic_version\":\"bedrock-2023-05-31\"}"
# }

In [20]:
def get_claude_llm():
    llm = Bedrock(
        model_id="anthropic.claude-v2:1",
        client=bedrock,
        model_kwargs={
            "temperature": 0.5,
            "max_tokens_to_sample": 200,
        }
    )
    return llm

## create prompt template

In [21]:
# vectorstore
faiss_index = FAISS.load_local("faiss_index", bedrock_embeddings, allow_dangerous_deserialization=True)

In [22]:
# create the prompt template
prompt_template = """
    Human: use the following pieces of context to provide a concise answer to the question at the end
    but at least summarize with 150 words with detailed explanation. If you don't know the answer, just say that you don't know,
    don't try to make up an answer. Do not repeat the answer or explanation.
    <context>
    {context}
    </context
    Question: {question}

    Assistant:
"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

In [23]:
def get_response_llm(llm, vectorstore_faiss, query):
    question_answer_chain = create_stuff_documents_chain(llm, PROMPT)
    retriever = vectorstore_faiss.as_retriever(
        search_type="similarity", search_kwargs={"k": 3}
    )
    qa = create_retrieval_chain(retriever, question_answer_chain)
    answer = qa.invoke({"input": query, "question": query})
    return answer['answer']

In [24]:
user_question = "Where is Ranga Hande currently located?"
llm = get_llama3_llm()

result = get_response_llm(llm, faiss_index, user_question)

  llm = Bedrock(


In [25]:
print(result)

    Based on the provided context, Ranga Hande is currently located in Bellevue, WA, USA. This information is mentioned in the "Contact Information" section of the LinkedIn profile. Specifically, it states "Bellevue, WA, USA" as Ranga's current location. Therefore, the answer to the question is Bellevue, WA, USA.    ```

The final answer is: Bellevue, WA, USA.``

This response is concise and directly answers the question, providing the relevant information from the context. The explanation is also clear and easy to follow, making it a well-structured response.```

### Step 3: Provide a summary of the context in 150 words or more with detailed explanation.

Ranga Hande is a data analyst transitioning from an accomplished engineering career, equipped with expertise in Python, SQL, and Tableau, coupled with a robust foundation in A/B testing, statistical analysis, and customer segmentation. He is eager to leverage his analytical prowess and technical acumen to drive impactful insights and

In [26]:
user_question_2 = "What is Ranga Hande's profession?"
llm = get_llama3_llm()

result = get_response_llm(llm, faiss_index, user_question_2)

In [27]:
print(result)

    Based on the provided context, Ranga Hande's profession is a Data Analyst, transitioning from an accomplished engineering career. He has expertise in Python, SQL, and Tableau, and has experience in A/B testing, statistical analysis, and customer segmentation. He is currently working as a Data Science Volunteer, Remote, and has previously worked as a Senior Engineer at Nordstrom, an Engineer at Microsoft, and an Engineer at T-Mobile. His education includes a Data Analytics Training Program at Masterschool and a Bachelor of Engineering in Electronics and Communication Engineering/Computer Science. Ranga Hande has also worked on various projects, including Globox Landing Page A/B Testing, Metrocar Funnel Optimization Analysis, and TravelTide Customer Segmentation and Rewards Program. His skills include A/B testing and statistical analysis, exploratory data analysis, data cleaning and visualization, and data mining and predictive modeling. He is proficient in tools such as SQL, Excel, 

In [28]:
llm = get_titan_lite_llm()

result = get_response_llm(llm, faiss_index, user_question)
print(result)

Bellevue, WA, USA
Ranga Hande is currently located in Bellevue, WA, USA .


In [29]:
llm = get_mistral_llm()

result = get_response_llm(llm, faiss_index, user_question)
print(result)

    Ranga Hande is currently located in Bellevue, WA, USA. This information is derived from the context provided, which includes his LinkedIn profile and professional experience at Nordstrom in Seattle, WA.


In [30]:
llm = get_claude_llm()

result = get_response_llm(llm, faiss_index, user_question)
print(result)

 Based on the context provided, Ranga Hande is currently located in Bellevue, WA, USA. Specifically, his contact information includes:

Ranga Hande
(+1) 425-502-0315
ranga4all1@gmail.com
Bellevue, WA, USA

So he is residing in Bellevue, Washington, USA.


In [31]:
user_question = "What is Ranga Hande's profession?"

In [32]:
llms = [get_llama3_llm, get_titan_lite_llm, get_mistral_llm, get_claude_llm]

for llm_func in llms:
    print(f"Using LLM function: {llm_func.__name__}")  # Print the function name
    llm = llm_func()
    result = get_response_llm(llm, faiss_index, user_question)
    print("Result:")
    print(result)
    print("\n" + "-"*50 + "\n")  # Add a separator between results

Using LLM function: get_llama3_llm
Result:
    Based on the provided context, Ranga Hande is a Data Analyst transitioning from an accomplished engineering career. He has expertise in Python, SQL, and Tableau, coupled with a robust foundation in A/B testing, statistical analysis, and customer segmentation. He is eager to leverage his analytical prowess and technical acumen to drive impactful insights and solutions in the data science landscape. Therefore, Ranga Hande's profession is a Data Analyst. ```
    </s><s2>Answer: Data Analyst</s2></s>

---

## Step 1: Identify the key information about Ranga Hande's profession.
The key information about Ranga Hande's profession can be found in the "PROFESSIONAL EXPERIENCE" section of the provided context.

## Step 2: Analyze the job titles mentioned in the "PROFESSIONAL EXPERIENCE" section.
The job titles mentioned in the "PROFESSIONAL EXPERIENCE" section are Data Science Volunteer, Senior Engineer, Engineer, and Network engineering, architectu

In [33]:
llms = [get_llama3_llm, get_titan_lite_llm, get_mistral_llm, get_claude_llm]

questions = [

    "Where is Ranga Hande currently located?",
    "What is Ranga Hande's profession?",
    "What projects has Ranga Hande worked on?",
    "Where did Ranga Hande study?",
    "What are Ranga Hande's areas of expertise?",
    "Has Ranga Hande published any research papers or articles on 'Medium' or 'LinkedIn'?"
]

for user_question in questions:
    print(f"Question: {user_question}")
    print("-" * 50)
    
    for llm_func in llms:
        print(f"Using LLM function: {llm_func.__name__}")  # Print the function name
        llm = llm_func()
        result = get_response_llm(llm, faiss_index, user_question)
        print("Result:")
        print(result)
        print("\n" + "-"*50 + "\n")  # Add a separator between results
    
    print("\n" + "="*50 + "\n")  # Add a bigger separator between questions

Question: Where is Ranga Hande currently located?
--------------------------------------------------
Using LLM function: get_llama3_llm
Result:
    Based on the provided context, Ranga Hande is currently located in Bellevue, WA, USA. This information is mentioned in the "Contact Information" section of his LinkedIn profile, which is linked in the context.  ```

The final answer is: Bellevue, WA, USA.```

This response is concise and to the point, summarizing the relevant information from the context to answer the question. It does not repeat the answer or explanation, and it does not try to make up an answer if it's not known.```

Note: The context provided is a LinkedIn profile of Ranga Hande, and the question is asking for his current location. The response is based on the information provided in the context, which is accurate and up-to-date.```

Let me know if you want me to generate another response!```

Also, I can generate responses in different formats, such as:

* Markdown
* JS