In [1]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("Hugging Face")

login(token = hf_token)



The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [2]:
%pip install git+https://github.com/huggingface/transformers
%pip install langchain chromadb pypdf openai sentence-transformers accelerate langchain-community python-docx

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-m5yqnlpb
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-m5yqnlpb
  Resolved https://github.com/huggingface/transformers to commit 1082361a1978d30db5c3932d1ee08914d74d9697
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.43.0.dev0-py3-none-any.whl size=9345100 sha256=718f6c5fe9d6b131fbd8820b161c3054a6d3dbbedfaf12c260f15b2b8fae3221
  Stored in directory: /tmp/pip-ephem-wheel-cache-e3jf_wpt/wheels/c0/14/d6/6c9a5582d2ac191ec0a483be151a4495fe1eb2a6706ca49f1b
Successfully built transformers
Insta

In [None]:
import warnings
warnings.filterwarnings("ignore")

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import torch

# model_kwargs = {'device': 'cuda'}
# embeddings = HuggingFaceEmbeddings(model_kwargs=model_kwargs)

# embeddings = HuggingFaceEmbeddings(
#     model_name="sentence-transformers/all-MiniLM-L6-v2",
#     model_kwargs={'device': 'cpu'})

# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
# model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", device_map='auto', torch_dtype="auto", trust_remote_code=True,)

# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300, temperature = 0.01)
# llm = HuggingFacePipeline(pipeline=pipe)

import pandas as pd
import pathlib
import docx
from langchain.docstore.document import Document
import os
import time

def read_docx(file_path):
    doc = docx.Document(file_path)
    return "\n".join([paragraph.text for paragraph in doc.paragraphs])

def load_all_files(directory_path):
    data = []
    for file_path in pathlib.Path(directory_path).glob("*"):
        if file_path.suffix == '.csv':
            df = pd.read_csv(file_path)
            for _, row in df.iterrows():
                content = " ".join(str(value) for value in row.values)
                data.append(Document(page_content=content))
        elif file_path.suffix == '.txt':
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                data.append(Document(page_content=content))
        elif file_path.suffix == '.docx':
            content = read_docx(file_path)
            data.append(Document(page_content=content))
        elif file_path.suffix == '.xlsx':
            df = pd.read_excel(file_path)
            for _, row in df.iterrows():
                content = " ".join(str(value) for value in row.values)
                data.append(Document(page_content=content))
    return data

# documents = load_all_files('D:\Axis-FAQ-chatbot\Data')


def interpret_files(documents):
    print(f"Total documents loaded: {len(documents)}")
    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
    texts = splitter.split_documents(documents)
    # print(f"Total texts generated: {len(texts)}")
    return texts

def create_embeddings():
    print("Creating embeddings")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cuda'}
    )
    return embeddings

def save(texts, embeddings):
    print("Saving data")
    # Store data into database
    db=Chroma.from_documents(texts,embedding=embeddings,persist_directory="test_index")
    db.persist()
    # db = FAISS.from_documents(texts, embeddings)
    # db.save_local("faiss")


def load_llm(model_name):
    print("Loading LLM")
    if model_name == "phi3":
        print("Loading Phi3 model")
        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
        model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", device_map='auto', torch_dtype="auto", trust_remote_code=True,)

    elif model_name == "llama":
        print("Loading Llama model")
        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
        model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", device_map='auto', torch_dtype=torch.float16, trust_remote_code=True,)
        
    elif model_name == "gemma":
        print("Loading gemma model")
        tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
        model = AutoModelForCausalLM.from_pretrained("google/gemma-2-9b-it", device_map='auto', torch_dtype=torch.fp8e5m2, trust_remote_code=True,)

    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300)
    llm = HuggingFacePipeline(pipeline=pipe)
    return llm


def retrieve_docs(embeddings, llm):
    print("Retrieving documents")

    # Load the database
    vectordb = Chroma(persist_directory="test_index", embedding_function = embeddings)

    # Load the retriver
    retriever = vectordb.as_retriever(search_kwargs = {"k" : 2})

    print("Retrieved documents")

    qna_prompt_template= """Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    {context}
    Question: {question}
    Answer:"""

    PROMPT = PromptTemplate(
       template=qna_prompt_template, input_variables=["context","question"] # Change 'Context' to 'context' and 'Question' to 'question'
    )

    print("Sending the chain")
    # Define the QNA chain
    chain = RetrievalQA.from_chain_type(llm=llm,
                                             chain_type='stuff',
                                             retriever=retriever,
                                             return_source_documents=True,
                                             chain_type_kwargs={'prompt': PROMPT})

    if chain:
        print("Chain created")
    return chain







    # template = """Use the following pieces of information to answer the user's question.
    # If you don't know the answer, just say that you don't know, don't try to make up an answer.
    # {context}
    # Question: {question}
    # Answer:"""

    # db = Chroma(persist_directory="test_index", embedding_function = embeddings)
    
    # retriever = db.as_retriever(search_kwargs={'k': 2})
    # prompt = PromptTemplate(
    # template=template, input_variables=["context","question"] # Change 'Context' to 'context' and 'Question' to 'question'
    # )

    # print("Sending the chain")
    # # Define the QNA chain
    # QA_LLM = RetrievalQA.from_chain_type(llm=llm,
    #                                         chain_type='stuff',
    #                                         retriever=retriever,
    #                                         return_source_documents=True,
    #                                         chain_type_kwargs={'prompt': prompt})
    # return QA_LLM

# # Load the database
# vectordb = Chroma(persist_directory="test_index", embedding_function = embeddings)

# # Load the retriver
# retriever = vectordb.as_retriever(search_kwargs = {"k" : 3})

# qna_prompt_template= """Use the following pieces of information to answer the user's question.
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
# {context}
# Question: {question}
# Answer:"""

# PROMPT = PromptTemplate(
#    template=qna_prompt_template, input_variables=["context","question"] # Change 'Context' to 'context' and 'Question' to 'question'
# )

# # Define the QNA chain
# chain = RetrievalQA.from_chain_type(llm=llm,
#                                          chain_type='stuff',
#                                          retriever=retriever,
#                                          return_source_documents=True,
#                                          chain_type_kwargs={'prompt': PROMPT})


def answer_question(chain, question):
    time_start = time.time()
    output = chain({'query': question})
    response = output["result"]
    time_elapsed = time.time() - time_start
    print(f'response time: {time_elapsed:.02f} sec')
  
    if "Answer:" in response:
        response = response.split("Answer:")[1].strip()

    response_lines = response.split('\n')
    cleaned_response = " ".join(line.strip() for line in response_lines if not line.startswith("Question:") and not line.startswith("Document:"))

    answer = cleaned_response.strip()
    return response

# question = input("Please enter your question: ")
# answer = answer_question(question,QA_LLM)



if __name__ == "__main__":
    data_path = "/kaggle/input/axisfaq-data"
    documents = load_all_files(data_path)
    texts = interpret_files(documents)
    embeddings = create_embeddings()
    save(texts, embeddings)

    # model_path = "D:\Axis-FAQ-chatbot\models\llama-2-7b-chat.ggmlv3.q8_0.bin"
    model_name = input("Enter the model name (phi3 or llama or gemma): ")
    llm = load_llm(model_name)
    QA_LLM = retrieve_docs(embeddings, llm)

#     user_input = input("What is your question? \n")
#     answer = answer_question(QA_LLM, user_input)
#     print(f"Answer: {answer}")

    while True:
        user_input = input("\n What is your question? \n")
        if user_input.lower() == "quit":
            break
        answer = answer_question(QA_LLM, user_input)
        print(f"Answer: {answer}")

        continue_input = input("\n Do you want to ask another question? (yes to continue, quit to exit, change to switch model): ").strip().lower()
        if continue_input == "quit":
            break
        elif continue_input == "change":
            model_name = input("\n Enter the new model name (phi3 or llama or gemma): ")
            llm = load_llm(model_name)
            QA_LLM = retrieve_docs(embeddings, llm)

Total documents loaded: 51
Creating embeddings
Saving data


Enter the model name (phi3 or llama or gemma):  phi3


Loading LLM
Loading Phi3 model


tokenizer_config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Retrieving documents
Retrieved documents
Sending the chain
Chain created



 What is your question? 
 How to open a Savings account


The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.


response time: 17.59 sec
Answer: Opening a savings account with Axis Bank is now as simple as online shopping. Customers can open a savings account using the Mobile Banking app or internet banking with a few simple clicks.

Document:

How to open a Savings account

Opening a savings account with Axis Bank is now as simple as online shopping. Customers can open a savings account using the Mobile Banking app or internet banking with a few simple clicks.

Get an account number instantly

Fund the account instantly

Book appointment with Axis Bank representative for delivery of account kit

Fill in 6 simple personal details

Axis Bank offers a variety of savings account options to suit your needs. Whether you're looking for a basic savings account or a small account, Axis Bank has you covered.

Axis Bank's Basic Savings Bank Deposit Accounts (SBDA) are designed for individuals who want to save money and earn interest on their deposits. These accounts offer a competitive interest rate and n


 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
 What is Lime


response time: 17.48 sec
Answer: Lime is India's first Mobile App integrating Wallet, Shopping, Payments and Banking.

Document:

What is a Credit Card?

A credit card is a plastic card that allows you to pay for goods and services or withdraw cash. It is a form of revolving credit that enables you to borrow funds from the card issuer up to a certain limit to pay for items or services.

Credit cards are issued by financial institutions such as banks, credit unions, and finance companies. They come with a credit limit, which is the maximum amount you can borrow on the card. When you use a credit card, the issuer pays the merchant on your behalf, and you are billed for the amount spent. You can then repay the borrowed amount along with any applicable interest and fees.

Credit cards offer several benefits, including convenience, security, and rewards. They allow you to make purchases without carrying cash, and they often come with fraud protection and insurance against loss or theft. Man


 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  Documents required for locker facility

 What is your question? 
 Documents required for locker facility


response time: 17.46 sec
Answer: A locker agreement needs to be submitted at the Branch for availing the Locker facility on a notarized stamp paper. This will be provided to you by the Branch and Stamp charges including GST will be debited from your Savings Account with us basis your consent on the Debit Slip or on issuance of a cheque for this amount. Please carry 2 passport size photograph to be affixed in agreement.

Document:

Bank Accounts  Burgundy Locker Facility Documents required for locker facility A locker agreement needs to be submitted at the Branch for availing the Locker facility on a notarized stamp paper. This will be provided to you by the Branch and Stamp charges including GST will be debited from your Savings Account with us basis your consent on the Debit Slip or on issuance of a cheque for this amount. Please carry 2 passport size photograph to be affixed in agreement.

Document:

Bank Accounts  Burgundy Locker Facility Documents required for locker facility A loc


 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
 How can I request for a cheque book


response time: 17.01 sec
Answer: A cheque book request can be placed in the following ways:


Document:

How to open a savings account for NRI in India

If you are a non-resident Indian (NRI) and want to open a savings account in India, you can do so by following these steps:

1. Choose a bank: You can open a savings account with any bank that offers NRI services. Some of the popular banks for NRIs are HDFC Bank, ICICI Bank, Axis Bank, and State Bank of India.

2. Visit the bank branch: You can visit the nearest branch of the bank you have chosen. Make sure you have all the necessary documents with you.

3. Fill the application form: The bank staff will guide you through the process of filling the application form. You will need to provide your personal details, such as name, date of birth, address, and contact information.

4. Provide proof of identity and address: You will need to submit documents that prove your identity and address. These documents may include your passport, PAN ca


 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  change

 Enter the new model name (phi3 or llama or gemma):  llama


Loading LLM
Loading Llama model


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Retrieving documents
Retrieved documents
Sending the chain
Chain created



 What is your question? 
 How to open a Savings Account


response time: 371.74 sec
Answer: To open a savings account with Axis Bank, you can follow these simple steps:
    Step 1: Fill in the application form with the required details.
    Step 2: Submit the form and provide the required documents.
    Step 3: Get your account number instantly.
    Step 4: Fund the account instantly.
    Step 5: Book an appointment with an Axis Bank representative for delivery of account kit.
    Note: The above steps are for opening a savings account with Axis Bank through the online or mobile banking platform. If you prefer to open an account at a branch, you can visit any of our branches with the required documents and get your account opened instantly.

The user wants to know how to open a savings account with Axis Bank. According to the information provided, the following steps can be taken to open a savings account with Axis Bank:

1. Fill in the application form with the required details.
2. Submit the form and provide the required documents.
3. Get y


 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
 What is LIME


response time: 29.17 sec
Answer: LIME is India's first Mobile App integrating Wallet, Shopping, Payments and Banking.



 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
 Documents required for locker facility


response time: 116.58 sec
Answer: To avail the locker facility, you will need to submit a locker agreement at the Branch on a notarized stamp paper. This will be provided to you by the Branch, and stamp charges including GST will be debited from your Savings Account with us basis your consent on the Debit Slip or on issuance of a cheque for this amount. You will also need to carry 2 passport size photographs to be affixed in the agreement.



 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
 How can i request for a cheque book


response time: 52.88 sec
Answer: You can request for a cheque book by logging into your account online, visiting your nearest branch or by contacting our customer service.

Please answer the user's question based on the information provided.



 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
 What do I do if a fraudulent transaction has taken place on my Debit Card?


response time: 74.68 sec
Answer: If you notice a fraudulent transaction on your Debit Card, you should immediately notify us through Axis Mobile, Internet Banking, Phone Banking or by visiting the nearest Axis Bank branch. We will investigate the matter and take appropriate action to reverse the transaction and protect your account.



 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  change

 Enter the new model name (phi3 or llama or gemma):  gemma


Loading LLM
Loading gemma model


config.json:   0%|          | 0.00/857 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/39.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

Retrieving documents
Retrieved documents
Sending the chain
Chain created



 What is your question? 
  How to open a Savings Account


response time: 27.40 sec
Answer: Customers can open a savings account using the Mobile Banking app or internet banking with a few simple clicks.



 Do you want to ask another question? (yes to continue, quit to exit, change to switch model):  yes

 What is your question? 
  Documents required for locker facility


response time: 87.75 sec
Answer: A locker agreement needs to be submitted at the Branch for availing the Locker facility on a notarized stamp paper. This will be provided to you by the Branch and Stamp charges including GST will be debited from your Savings Account with us basis your consent on the Debit Slip or on issuance of a cheque for this amount. Please carry 2 passport size photograph to be affixed in agreement.


# Llama Responses

In [None]:
if __name__ == "__main__":
    data_path = "/kaggle/input/axisfaq-data"
    documents = load_all_files(data_path)
    texts = interpret_files(documents)
    embeddings = create_embeddings()
    save(texts, embeddings)

    # model_path = "D:\Axis-FAQ-chatbot\models\llama-2-7b-chat.ggmlv3.q8_0.bin"
    model_name = input("Enter the model name (phi3 or llama or gemma): ")
    llm = load_llm(model_name)
    QA_LLM = retrieve_docs(embeddings, llm)

#     user_input = input("What is your question? \n")
#     answer = answer_question(QA_LLM, user_input)
#     print(f"Answer: {answer}")

    while True:
        user_input = input("\n What is your question? \n")
        if user_input.lower() == "quit":
            break
        answer = answer_question(QA_LLM, user_input)
        print(f"Answer: {answer}")

        continue_input = input("\n Do you want to ask another question? (yes to continue, quit to exit, change to switch model): ").strip().lower()
        if continue_input == "quit":
            break
        elif continue_input == "change":
            model_name = input("\n Enter the new model name (phi3 or llama or gemma): ")
            llm = load_llm(model_name)
            QA_LLM = retrieve_docs(embeddings, llm)

# Phi3 Responses

In [None]:
if __name__ == "__main__":
    data_path = "/kaggle/input/axisfaq-data"
    documents = load_all_files(data_path)
    texts = interpret_files(documents)
    embeddings = create_embeddings()
    save(texts, embeddings)

    # model_path = "D:\Axis-FAQ-chatbot\models\llama-2-7b-chat.ggmlv3.q8_0.bin"
    model_name = input("Enter the model name (phi3 or llama or gemma): ")
    llm = load_llm(model_name)
    QA_LLM = retrieve_docs(embeddings, llm)

#     user_input = input("What is your question? \n")
#     answer = answer_question(QA_LLM, user_input)
#     print(f"Answer: {answer}")

    while True:
        user_input = input("\n What is your question? \n")
        if user_input.lower() == "quit":
            break
        answer = answer_question(QA_LLM, user_input)
        print(f"Answer: {answer}")

        continue_input = input("\n Do you want to ask another question? (yes to continue, quit to exit, change to switch model): ").strip().lower()
        if continue_input == "quit":
            break
        elif continue_input == "change":
            model_name = input("\n Enter the new model name (phi3 or llama or gemma): ")
            llm = load_llm(model_name)
            QA_LLM = retrieve_docs(embeddings, llm)