# Generative AI Assistant

import generic libraries

In [4]:
import configparser

import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
# Pinecone instance
from pinecone import Pinecone, PodSpec


# Document Loader
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings

#Langchain app
from langchain.chat_models import AzureChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA
from langchain.agents import Tool


initialize credentials

In [5]:
config = configparser.ConfigParser()
config.read('config.ini')

['config.ini']

In [6]:
OPENAI_API_KEY = config.get('Openai', 'deployment')
PINECONE_API_KEY = config.get('Pinecone', 'api_key')
PINECONE_API_ENV = config.get('Pinecone', 'environment')

## Set up index in Pinecone

In [7]:
pc = Pinecone(api_key=PINECONE_API_KEY)
spec = PodSpec(environment=PINECONE_API_ENV)

index_name = "bank-index"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        index_name,
        dimension=1536, 
        metric='cosine',
        spec=spec
    )

index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.00051,
 'namespaces': {'': {'vector_count': 51}},
 'total_vector_count': 51}

## Document Loader --> Load data (PDFs, webscrapping, audio files)

### PDF Loader

example on how to load a simple document

In [72]:
loader = PyPDFLoader("FAQ _ Nordea.pdf")
data = loader.load()

In [73]:
print (f'There are {len(data)} pages in the pdf')

There are 5 pages in the pdf


In [40]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documentssplit(data)
print(f"Now there are {len(documents)} documents")

Now there are 17 documents


In [8]:
embeddings_client = OpenAIEmbeddings(api_key=OPENAI_API_KEY)

In [42]:
embeddings = []
for doc in documents:
    embeddings.append(embeddings_client.embed_query(doc.page_content))

In [43]:
len(embeddings)

17

### Web scrapper

scrapping products from a dummy bank webpage

In [71]:
import requests
from bs4 import BeautifulSoup

url = "https://www.capitalone.com/bank/money-management/banking-basics/banking-products-and-services/"

response = requests.get(url)

soup = BeautifulSoup(response.content, "html.parser")

target_div = soup.find("div", class_="grv-shr-lib-col--sm-4 article-body-content")

all_text = target_div.get_text()

print(len(all_text))
 

10117


In [79]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
documents = text_splitter.split_text(all_text)
print(f"Now there are {len(documents)} documents")

Now there are 51 documents


In [82]:
embeddings = []
for doc in documents:
    embeddings.append(embeddings_client.embed_query(doc))

generate random ids

In [83]:
import uuid

def generate_unique_ids(count=len(embeddings), length=7):
    unique_ids = []
    while len(unique_ids) < count:
        new_id = str(uuid.uuid4()).replace('-', '')[:length]
        if new_id not in unique_ids:
            unique_ids.append(new_id)
    return unique_ids

ids = generate_unique_ids()
print(ids)

['72276c6', 'aec5711', 'fe3315a', '8b16c17', '93bda80', '87363ad', 'b598ccb', '8979627', 'a44842d', '792a6ef', '9c8d09f', '9846e66', '3f9aa1f', '0cd27f0', '9ece720', '90b1ee4', 'e45e304', 'a8e685c', 'e0e8eb5', 'e0c2739', '7b79944', '7d92edf', '6f311db', 'b6288ed', 'ea37102', '2fd665e', '9fb6c26', 'c900522', 'fe66dc4', 'dd1ed67', '838ed98', 'e31bf93', '7e2d082', 'd26edb9', '0da398c', 'ebb4a25', '6e908f5', '1281ee6', 'f0ad0f3', 'ed3bc70', 'd1d524b', '690d863', '1a77af8', '6041c4f', '0ee9620', 'fec379b', '423404e', '9295fca', 'a695c2f', '9cef76c', '95a8028']


In [85]:
metadata = []

for doc in documents:
    metadata.append({'text': doc})


ingest code into index 

In [97]:
data = list(zip(ids, embeddings, metadata))
index.upsert(vectors=data)

{'upserted_count': 51}

### Audio files --> Speech2Text

loading dummy audio .mp3 example

In [199]:
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import torch
import librosa
import soundfile

model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")

audio_file = "mypodcast.mp3"
audio_array, sampling_rate = librosa.load(audio_file, sr=16000)  

input_values = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_values

with torch.no_grad():
    logits = model(input_values).logits

predicted_ids = torch.argmax(logits, dim=-1)

transcription = processor.batch_decode(predicted_ids)[0]
print(transcription)


Some weights of the model checkpoint at facebook/wav2vec2-base-960h were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.masked_spec_embed']
You sho

O WELCOME TO MY PODCAST DO LITTLE EXPERIMENTS HERE IAM GOING TO LET YOU THIS ENTO A FONG CONVERSATION WITH ME AND MY MOTHER SO I'M GOING TO DIL HER NOW AND YOU WILL HEAR WHAT I HAVE TO SAY BUT NOT HER DILING NOW RING REANG LITTLE MOTHER IAM DOING GOOD I AM RECORDING TO MARRIA FOR A PROJECT NO M O NO OSE TO MORROW Y YETS FOR A PODCAST I AM LIVE ON A PODCAST ERO NOW NO I AM  TALKING INTO MY MY CROPHONE AND ITS SCATTERING MY VOICE YOUN A THEY CAME HER YOU IMPRETTY MUCH ET IM A PRE EXCITED AS I GOT MY LAB AND LECTURE SCETUAL NEXT MONTH AND WENT BACK TO AFTERNOON CLASS AND I'LL SO TAT OF NICE PAIR OF HAD FALNS I GUESS THEY THEY JUST THEY GAVE HIM TO US WITH THE BOOKS I GUESS SO IM JUST TOO SOUND FOUNDATIONS AND SOUNDS YET I NO  HOP SO YE A THIS CONTO TO MY EXPERIMENT


In [201]:
transcription

"O WELCOME TO MY PODCAST DO LITTLE EXPERIMENTS HERE IAM GOING TO LET YOU THIS ENTO A FONG CONVERSATION WITH ME AND MY MOTHER SO I'M GOING TO DIL HER NOW AND YOU WILL HEAR WHAT I HAVE TO SAY BUT NOT HER DILING NOW RING REANG LITTLE MOTHER IAM DOING GOOD I AM RECORDING TO MARRIA FOR A PROJECT NO M O NO OSE TO MORROW Y YETS FOR A PODCAST I AM LIVE ON A PODCAST ERO NOW NO I AM  TALKING INTO MY MY CROPHONE AND ITS SCATTERING MY VOICE YOUN A THEY CAME HER YOU IMPRETTY MUCH ET IM A PRE EXCITED AS I GOT MY LAB AND LECTURE SCETUAL NEXT MONTH AND WENT BACK TO AFTERNOON CLASS AND I'LL SO TAT OF NICE PAIR OF HAD FALNS I GUESS THEY THEY JUST THEY GAVE HIM TO US WITH THE BOOKS I GUESS SO IM JUST TOO SOUND FOUNDATIONS AND SOUNDS YET I NO  HOP SO YE A THIS CONTO TO MY EXPERIMENT"

# Generative AI Assistant

## Semantic search in pinecone

In [9]:
query = "what can I use a debit card for?"
xq = embeddings_client.embed_query(query)
xc = index.query(vector=xq, top_k=5, include_metadata=True)
xc

{'matches': [{'id': '9fb6c26',
              'metadata': {'text': 'or check, which will take money directly '
                                   'from your account, to pay for everything '
                                   'from groceries to gas to bills. You can '
                                   'also get cash from an ATM or branch using '
                                   'your debit card and PIN, a unique password '
                                   'you choose to protect your account.'},
              'score': 0.883979261,
              'values': []},
             {'id': 'd26edb9',
              'metadata': {'text': 'With a debit card, you can pay for '
                                   'everyday expenses with just a swipe (and '
                                   'usually your PIN). The money will come '
                                   'straight from your checking account so '
                                   'there’s no need to carry cash if you '
                    

In [100]:
for match in xc.matches:
    print(match.id, match.score, match.metadata) 

9fb6c26 0.883182943 {'text': 'or check, which will take money directly from your account, to pay for everything from groceries to gas to bills. You can also get cash from an ATM or branch using your debit card and PIN, a unique password you choose to protect your account.'}
d26edb9 0.871337235 {'text': 'With a debit card, you can pay for everyday expenses with just a swipe (and usually your PIN). The money will come straight from your checking account so there’s no need to carry cash if you prefer not to. Plus, if your debit card is lost or stolen, you may not be responsible for unauthorized'}
ebb4a25 0.846149862 {'text': 'Credit card'}
d1d524b 0.832676 {'text': 'to use your bank’s ATM to access your cash, you might have to pay a fee to use others. Many credit cards allow cash withdrawals as well, but this “cash advance” can come with a hefty fee, so be sure to find out beforehand.2'}
7e2d082 0.822569847 {'text': 'The longer you save, the greater the return. You can always decide to wi

## Langchain app

In [10]:
from langchain.vectorstores import Pinecone

text_field = "text"
index = pc.Index(index_name)

vectorstore = Pinecone(
    index, 
    embeddings_client.embed_query, 
    text_field
)

In [11]:
llm = AzureChatOpenAI(
    deployment_name= config.get('Settings', 'deployment_name'),
    openai_api_version=  config.get('Settings', 'api_version'),
    openai_api_key= config.get('Settings', 'api_key'),
    openai_api_base = config.get('Settings', 'api_endpoint')
)

conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = vectorstore.as_retriever(),
    verbose=True
)

In [227]:
def call_an_assistant(input=""):
    return "It seems you need further support. Please wait until we assign you an assistant"

In [228]:
def irrelevant_questions(input=""):
    return "Sorry, I am an assistant for Bank101. Please let me now if I can support you with questions relevant to this bank"

In [229]:

tools = [
    Tool(
        name='Knowledge Base',
        func=qa.run,
        description=(
            'use this tool to empower banking 101 sales. You are profiecint in recommend the best products for users and addressing the right questions'
        )
    ),
    
     Tool(
        name='Call an assistant',
        func=call_an_assistant, 
        description=(
            'use this if the user claims for assistance'
        )
    ),
     Tool(
        name='Handle irrelevant questions',
        func=irrelevant_questions, 
        description=(
            'use this if the user asks something is not related to bank'
        )
    )
]


In [230]:
print(qa.run("what can I use a debit card for?"))



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
You can use a debit card to pay for a variety of things, such as groceries, gas, bills, and other everyday expenses. You can also use your debit card to withdraw cash from an ATM or a bank branch, as long as you have your PIN. The funds for these transactions will be taken directly from your checking account.


In [231]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [232]:
print(agent.agent.llm_chain.prompt.messages[0].prompt.template)

Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Overall, Assistant is a powerful system that can help with a wide range of task

In [234]:
agent.agent.llm_chain.prompt.messages[0].prompt.template="""

You are an AI bank sales assistant.

Your task is to empower sales bank by recommending products.

You have access three tools "Knowledge Base", "call an assistant" and "'Handle irrelevant questions'.

If one of these tools cannot be executed, answer "I don't know"

"""

### Tests

In [235]:
#Expected output: Answer that this is a genai bank sales assistant not able to answer that
#Succeed
query = "Who the new king of Denmark?"
agent(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Handle irrelevant questions",
    "action_input": "Who the new king of Denmark?"
}
```[0m
Observation: [38;5;200m[1;3mSorry, I am an assistant for Bank101. Please let me now if I can support you with questions relevant to this bank[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "I'm here to assist you with banking-related questions. If you have any inquiries about our products or services, please feel free to ask!"
}
```[0m

[1m> Finished chain.[0m


{'input': 'Who the new king of Denmark?',
 'chat_history': [],
 'output': "I'm here to assist you with banking-related questions. If you have any inquiries about our products or services, please feel free to ask!"}

In [236]:
#Expected output: RAG and recommend different accounts
#Succeed
query = "Which bank accounts are there?"
agent(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Knowledge Base",
    "action_input": "Which bank accounts are there?"
}
```[0m

[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

Observation: [36;1m[1;3mBank accounts can come in various forms, each designed for different needs and purposes. Here are some common types of bank accounts that are generally available at most consumer or retail banks:

1. Checking Account: This is a transactional account primarily used for daily expenses, bill payments, and routine financial transactions. It often comes with a debit card and the ability to write checks.

2. Savings Account: A savings account is intended for storing money and accumulating interest over time. It typically offers a higher interest rate than checking accounts but might have restrictions on the number of transactions you can make.

3. Money Market Account: This account is a hybrid between a checking and savings account

{'input': 'Which bank accounts are there?',
 'chat_history': [HumanMessage(content='Who the new king of Denmark?'),
  AIMessage(content="I'm here to assist you with banking-related questions. If you have any inquiries about our products or services, please feel free to ask!")],
 'output': "Bank accounts come in various forms, such as Checking Accounts for daily expenses, Savings Accounts for storing money, Money Market Accounts which are a hybrid of checking and savings, Certificates of Deposit (CDs) for fixed-term savings, Individual Retirement Accounts (IRAs) for retirement savings, Joint Accounts for shared access by multiple individuals, Business or Corporate Accounts for managing business finances, Student Accounts with features tailored to individuals in education, and Trust Accounts managed for the benefit of another person. It's important to compare their features like fees, interest rates, and services offered before making a decision."}

In [237]:
#Expected output: RAG and recommend "checkign account"
#Succeed
query = "Which one is better for my day-to-day expenses?"
agent(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Knowledge Base",
    "action_input": "day-to-day expenses account recommendation"
}
```[0m

[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

Observation: [36;1m[1;3mFor managing day-to-day expenses, a checking account is generally recommended. Checking accounts are designed for frequent transactions, such as depositing paychecks, withdrawing cash, making purchases with a debit card, and paying bills. They typically offer unlimited transactions and may come with a debit card and checkbook, which can be very convenient for daily use.

Some key benefits of using a checking account for everyday expenses include:

1. Easy Access: Checking accounts often come with ATMs and online banking options, providing round-the-clock access to your funds.
2. Security: It's safer to carry a debit card than to carry a large amount of cash.
3. Expense Tracking: You can monitor your income and exp

{'input': 'Which one is better for my day-to-day expenses?',
 'chat_history': [HumanMessage(content='Who the new king of Denmark?'),
  AIMessage(content="I'm here to assist you with banking-related questions. If you have any inquiries about our products or services, please feel free to ask!"),
  HumanMessage(content='Which bank accounts are there?'),
  AIMessage(content="Bank accounts come in various forms, such as Checking Accounts for daily expenses, Savings Accounts for storing money, Money Market Accounts which are a hybrid of checking and savings, Certificates of Deposit (CDs) for fixed-term savings, Individual Retirement Accounts (IRAs) for retirement savings, Joint Accounts for shared access by multiple individuals, Business or Corporate Accounts for managing business finances, Student Accounts with features tailored to individuals in education, and Trust Accounts managed for the benefit of another person. It's important to compare their features like fees, interest rates, and s

In [238]:
#Expected output: call an agent
#Succeed
query = "Oh taht sounds good! Can I talk with an agent?"
agent(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Call an assistant",
    "action_input": "User requesting to talk with an agent"
}
```[0m
Observation: [33;1m[1;3mIt seems you need further support. Please wait until we assign you an assistant[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "An assistant will be with you shortly to help with your request."
}
```[0m

[1m> Finished chain.[0m


{'input': 'Oh taht sounds good! Can I talk with an agent?',
 'chat_history': [HumanMessage(content='Who the new king of Denmark?'),
  AIMessage(content="I'm here to assist you with banking-related questions. If you have any inquiries about our products or services, please feel free to ask!"),
  HumanMessage(content='Which bank accounts are there?'),
  AIMessage(content="Bank accounts come in various forms, such as Checking Accounts for daily expenses, Savings Accounts for storing money, Money Market Accounts which are a hybrid of checking and savings, Certificates of Deposit (CDs) for fixed-term savings, Individual Retirement Accounts (IRAs) for retirement savings, Joint Accounts for shared access by multiple individuals, Business or Corporate Accounts for managing business finances, Student Accounts with features tailored to individuals in education, and Trust Accounts managed for the benefit of another person. It's important to compare their features like fees, interest rates, and se