In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [None]:
url = "https://jupiter.money/contact/"
headers = {"User-Agent": "Mozilla/5.0"}

In [None]:

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

In [None]:
faq_section = soup.find('ul', {'data-controller': 'faq'})
faq_items = faq_section.find_all('li')


In [None]:
faq_data = []
for item in faq_items:
    question = item.find('span').get_text(strip=True) if item.find('span') else ''
    answer = item.find('p').get_text(strip=True) if item.find('p') else ''
    if question and answer:
        faq_data.append({'question': question, 'answer': answer})


In [None]:
# Save as CSV
df = pd.DataFrame(faq_data)
df.to_csv('faq_raw.csv', index=False)

In [None]:
import re

In [None]:
def clean_text(text):
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    return text

In [None]:
df['question_cleaned'] = df['question'].str.lower().str.replace(r'[^\w\s]', '', regex=True)


In [None]:
df = df.drop_duplicates(subset='question_cleaned')


In [None]:
def categorize(question):
    if any(keyword in question.lower() for keyword in ['kyc', 'identity']):
        return 'KYC'
    elif any(keyword in question.lower() for keyword in ['reward', 'point']):
        return 'Rewards'
    elif any(keyword in question.lower() for keyword in ['payment', 'upi', 'transfer']):
        return 'Payments'
    elif any(keyword in question.lower() for keyword in ['limit', 'balance']):
        return 'Limits'
    else:
        return 'General'

df['category'] = df['question'].apply(categorize)


In [None]:
!pip install transformers sentencepiece



In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

In [None]:
tokenizer = AutoTokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
model = AutoModelForSeq2SeqLM.from_pretrained("prithivida/parrot_paraphraser_on_T5")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [None]:
def paraphrase_answer(answer, num_return_sequences=1):
    text = f"paraphrase: {answer} </s>"
    encoding = tokenizer.encode_plus(text, padding='longest', return_tensors="pt")
    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]

    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=256,
        do_sample=True,
        top_k=120,
        top_p=0.95,
        early_stopping=True,
        num_return_sequences=num_return_sequences
    )

    return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

In [None]:
# Example usage
original = "You can update your KYC details from the app under Profile > KYC."
print(paraphrase_answer(original))

The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


['You can update your KYC details from the app under Profile > KYC.']


In [None]:
df['rephrased_answer'] = df['answer'].apply(lambda x: paraphrase_answer(x)[0])


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not va

In [None]:
!pip install sentence-transformers faiss-cpu



In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')  # small, fast, good quality

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
faq_questions = df['question'].tolist()
faq_embeddings = model.encode(faq_questions, show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
import faiss

In [None]:
dimension = faq_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(faq_embeddings))

In [None]:
def find_similar_faqs(query, top_k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), top_k)
    results = []
    for idx in indices[0]:
        question = df.iloc[idx]['question']
        answer = df.iloc[idx]['rephrased_answer']
        results.append((question, answer))
    return results

In [None]:
query = "Where do I change my KYC details?"
for q, a in find_similar_faqs(query):
    print(f"Matched FAQ: {q}\nAnswer: {a}\n")


Matched FAQ: How can I open a Savings
            account?
Answer: To open a free savings or salary bank account on Jupiter - powered by Federal Bank - in 3 minutes simply install the Jupiter App.Follow the on-screen instructions to create your account.

Matched FAQ: How can I set a PIN for my
            Debit Card?
Answer: You can set your debit card PIN by tapping on ‘PIN’ in the card tab of the Jupiter app.

Matched FAQ: How can I get a Federal Bank
            passbook?
Answer: You can request a Passbook by visiting your nearest Federal Bank branch. Please note that the Passbook is chargeable and is available only to verified account holders.



In [None]:
def find_best_faq_match(query, threshold=0.4):  # adjust threshold as needed
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), 1)

    best_distance = distances[0][0]
    best_index = indices[0][0]

    if best_distance < threshold:  # closer means more similar
        question = df.iloc[best_index]['question']
        answer = df.iloc[best_index]['rephrased_answer']
        return answer
    else:
        return "I'm not sure about that. Could you rephrase or ask something else?"


In [None]:
print(find_best_faq_match("Can I track my pizza order?"))
# Output: I'm not sure about that. Could you rephrase or ask something else?

print(find_best_faq_match("How do I complete my KYC?"))
# Output: <KYC-related answer>


I'm not sure about that. Could you rephrase or ask something else?
I'm not sure about that. Could you rephrase or ask something else?


In [None]:
def chat_with_bot():
    print("🤖 Hi! Ask me anything about Jupiter's services. Type 'exit' to stop.\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            print("🤖 Bye! Have a great day.")
            break

        response = find_best_faq_match(user_input)
        print(f"🤖 {response}\n")


In [None]:
chat_with_bot()

🤖 Hi! Ask me anything about Jupiter's services. Type 'exit' to stop.

You: What is jupiter money
🤖 Jupiter is the 1-app for everything money that lets you spend and save money, track expenses, pay bills and invest money in direct mutual funds. It enables you to make smart money decisions every day using simple intuitive and personalized money management tools

You: Is Jupiter a bank?
🤖 Jupiter is not a bank and does not hold or claim to hold a banking license. The Savings Account and the VISA Debit Card are provided by Federal Bank and follow all security standards as applicable. All funds in your account are insured up to the approved limit by DICGC.

You: exit
🤖 Bye! Have a great day.


In [None]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.46.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hI

In [None]:
import streamlit as st

In [None]:
st.title("🧠 Jupiter FAQ Bot")
st.write("Ask me anything about Jupiter's services!")

2025-06-22 12:06:05.840 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [None]:
user_input = st.text_input("Your Question")

2025-06-22 12:06:20.917 Session state does not function when running a script without `streamlit run`


In [None]:
if user_input:
    response = find_best_faq_match(user_input)
    st.markdown(f"**🤖 Answer:** {response}")

In [None]:
#

In [None]:
chat_with_bot()

🤖 Hi! Ask me anything about Jupiter's services. Type 'exit' to stop.

You: hey what is jupiter used for
🤖 I'm not sure about that. Could you rephrase or ask something else?

You: what is jupiter
🤖 I'm not sure about that. Could you rephrase or ask something else?

You:  What is jupiter money
🤖 Jupiter is the 1-app for everything money that lets you spend and save money, track expenses, pay bills and invest money in direct mutual funds. It enables you to make smart money decisions every day using simple intuitive and personalized money management tools

You: exit
🤖 Bye! Have a great day.
