In [3]:
import requests
from bs4 import BeautifulSoup
import json
import time

In [2]:
BASE_URL = "https://community.jupiter.money"
CATEGORY_URL = f"{BASE_URL}/c/help/27"  # the help section

In [3]:
headers = {"User-Agent": "Mozilla/5.0"}

##### Below script is for the infinte scroll website

In [5]:
def get_all_topic_links(max_questions=300):
    links = []
    page = 0
    while len(links) < max_questions:
        url = f"{CATEGORY_URL}.json?page={page}"
        print(f"🔄 Fetching page {page}...")
        res = requests.get(url, headers=headers)
        if res.status_code != 200:
            break
        data = res.json()
        topics = data["topic_list"]["topics"]
        if not topics:
            break
        for topic in topics:
            slug = topic["slug"]
            id_ = topic["id"]
            title = topic["title"]
            topic_url = f"{BASE_URL}/t/{slug}/{id_}"
            links.append((title, topic_url))
            if len(links) >= max_questions:
                break
        page += 1
        time.sleep(1)  # be polite
    return links

def scrape_topic(title, url):
    try:
        print(f"➡️ Scraping: {title}")
        res = requests.get(url + ".json", headers=headers)
        data = res.json()
        posts = data["post_stream"]["posts"]
        full_question_html = posts[0]["cooked"]
        soup = BeautifulSoup(full_question_html, "html.parser")
        question_text = soup.get_text(strip=True)

        answers = []
        for post in posts[1:]:
            soup = BeautifulSoup(post["cooked"], "html.parser")
            text = soup.get_text(strip=True)
            if text:
                answers.append(text)

        return {
            "question_heading": title,
            "full_question": question_text,
            "answers": answers
        }
    except Exception as e:
        print(f"❌ Failed to scrape {url}: {e}")
        return None

def scrape_all_faqs(limit=300):
    all_data = []
    topics = get_all_topic_links(max_questions=limit)
    print(f"📌 Found {len(topics)} topics.")

    for title, url in topics:
        data = scrape_topic(title, url)
        if data:
            all_data.append(data)
        time.sleep(0.5)  # gentle on server

    with open("jupiter_faq_data.json", "w", encoding="utf-8") as f:
        json.dump(all_data, f, indent=2, ensure_ascii=False)
    print("✅ Scraping complete! Saved to jupiter_faq_data.json")

# 🔁 Run the scraper
scrape_all_faqs(limit=300)


🔄 Fetching page 0...
🔄 Fetching page 1...
🔄 Fetching page 2...
🔄 Fetching page 3...
🔄 Fetching page 4...
🔄 Fetching page 5...
🔄 Fetching page 6...
🔄 Fetching page 7...
🔄 Fetching page 8...
🔄 Fetching page 9...
📌 Found 300 topics.
➡️ Scraping: Process for service requests - Please follow this (Important)
➡️ Scraping: How to remove postpaid number reminders?
➡️ Scraping: Custom metal debit card
➡️ Scraping: How to transfer money from jupiter second account?
➡️ Scraping: Superpots removed
➡️ Scraping: Periodic Kyc update 
➡️ Scraping: Pot withdrawal issue
➡️ Scraping: RTO payment failed but money debited
➡️ Scraping: Need help onboarding
➡️ Scraping: Unable to Make Transactions Using My Visa Debit Card
➡️ Scraping: EMI loan repayment showing in progress
➡️ Scraping: I forgot my Jupiter mpin and I lost my debit card
➡️ Scraping: How to redeem Jupiter CC points without a new jupiter bank account?
➡️ Scraping: Lost acces to Jupiter bank account
➡️ Scraping: Referral code program
➡️ Scraping:

In [2]:
import json

with open("jupiter_faq_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [3]:
data[3]

{'question_heading': 'How to transfer money from jupiter second account?',
 'full_question': 'Hi, as we know Jupiter has two accounts in the app with Federal Bank.I don’t have any ongoing pot but somehow I transferred money to my second account using account details.Now how can I transfer that money to my main savings account?',
 'answers': ['Hai@Ray_SarGo to POTS section and click on it.Use the withdraw money/ delete Pots option',
  'You can use UPI',
  'Hello Ray, we have reached out to you via DM. Kindly take a moment to review and share your registered details with us. Thank you.']}

In [10]:
faq = data[3]
q = faq['full_question'].strip()
a = "\n".join(faq['answers']).strip()

In [11]:
print(q)

Hi, as we know Jupiter has two accounts in the app with Federal Bank.I don’t have any ongoing pot but somehow I transferred money to my second account using account details.Now how can I transfer that money to my main savings account?


In [12]:
print(a)

Hai@Ray_SarGo to POTS section and click on it.Use the withdraw money/ delete Pots option
You can use UPI
Hello Ray, we have reached out to you via DM. Kindly take a moment to review and share your registered details with us. Thank you.


In [13]:
print(f"Q: {q}\nA: {a}")

Q: Hi, as we know Jupiter has two accounts in the app with Federal Bank.I don’t have any ongoing pot but somehow I transferred money to my second account using account details.Now how can I transfer that money to my main savings account?
A: Hai@Ray_SarGo to POTS section and click on it.Use the withdraw money/ delete Pots option
You can use UPI
Hello Ray, we have reached out to you via DM. Kindly take a moment to review and share your registered details with us. Thank you.


In [2]:
with open("jupiter_faq_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

documents = []
for faq in data:
    q = faq['full_question'].strip()
    a = "\n".join(faq['answers']).strip()
    documents.append(f"Q: {q}\nA: {a}")/

Save it for vector indexing
with open("faq_chunks.txt", "w", encoding="utf-8") as f:
    for doc in documents:
        f.write(doc + "\n\n")

In [4]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle

model = SentenceTransformer("all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
embeddings = model.encode(documents, show_progress_bar=True)

In [None]:
print(embeddings.shape)

In [None]:
embeddings[1]

In [None]:
dimension = embeddings.shape[1] 

In [None]:
# FAISS index
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))


In [24]:
# Save index + docs
faiss.write_index(index, "faq_index.faiss")
with open("faq_texts.pkl", "wb") as f:
    pickle.dump(documents, f)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import faiss, pickle
from sentence_transformers import SentenceTransformer
import torch

# Load everything
retriever_model = SentenceTransformer("all-MiniLM-L6-v2")
index = faiss.read_index("faq_index.faiss")
with open("faq_texts.pkl", "rb") as f:
    faq_texts = pickle.load(f)

##### Trying this on a new model

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "HuggingFaceH4/zephyr-7b-beta"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)


Loading checkpoint shards: 100%|████████████████████| 8/8 [00:12<00:00,  1.62s/it]
Some parameters are on the meta device because they were offloaded to the disk and cpu.


In [11]:
def retrieve_context(query, top_k=3):
    query_embedding = retriever_model.encode([query])
    scores, indices = index.search(query_embedding, top_k)
    return [faq_texts[i] for i in indices[0]]


In [12]:
def generate_zephyr_response(user_query):
    context_chunks = retrieve_context(user_query)
    context = "\n\n".join(context_chunks)

    prompt = f"""<|system|>
You are a helpful and friendly assistant for a finance app. Use the context below to help the user. 
If the question can't be answered with the given context, respond politely with: 
"I'm not sure about that. Please contact support."

<|user|>
Context:
{context}

Question:
{user_query}

<|assistant|>"""

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return full_output.split("<|assistant|>")[-1].strip()


In [2]:
import torch
print(torch.cuda.is_available())          # Should return True
print(torch.version.cuda)                 # Should return something like '12.1'
print(torch.cuda.get_device_name(0))      # Your GPU name


True
12.1
NVIDIA GeForce RTX 2050


In [13]:
print(generate_zephyr_response("What should I do if my RTO payment failed but amount was debited?"))
print(generate_zephyr_response("How can I permanently delete my Jupiter account?"))
print(generate_zephyr_response("Can I apply for a new card after blocking my old one?"))


Response:
Hi [User],

We suggest waiting for 2-3 business days, as it can take some time for the transaction to complete. In the event that the money has not been received by the beneficiary after this time, we will initiate a direct message (DM) to continue the discussion. Please also ensure that you have provided the correct beneficiary details, as mistakes in beneficiary information can sometimes cause delays or failures in transactions. If you need any further assistance, please don't hesitate to reach out to us.


KeyboardInterrupt: 

In [14]:
pip install gradio

Collecting gradioNote: you may need to restart the kernel to use updated packages.

  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.14-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting importlib-resources<7.0,>=1.3 (from gradio)
  Downloading importlib_resources-6.5.2-py3-none-any.whl.metadata (3.9 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl.metadata (3.1 kB)
Collecting matplotlib~=3.0 (from gradio)
  Downloading matplotlib-3.9.4-cp39-cp39-win_amd64.whl.metadata (11 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.18-cp39-cp39-w

  You can safely remove it manually.
  You can safely remove it manually.


#### UI for the bot

In [15]:
import gradio as gr

# Gradio-compatible function
def chat_with_faq_bot(user_input, chat_history=[]):
    response = generate_zephyr_response(user_input)
    chat_history.append((user_input, response))
    return "", chat_history

# Launch Chatbot UI
gr.ChatInterface(
    fn=chat_with_faq_bot,
    title="📘 Jupiter FAQ Bot",
    description="Ask questions related to Jupiter Money app FAQs. Powered by Zephyr + FAISS.",
    theme="default"
).launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\Rishabh\anaconda3\envs\jupiteraibot\lib\site-packages\gradio\queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "C:\Users\Rishabh\anaconda3\envs\jupiteraibot\lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "C:\Users\Rishabh\anaconda3\envs\jupiteraibot\lib\site-packages\gradio\blocks.py", line 1945, in process_api
    data = await self.postprocess_data(block_fn, result["prediction"], state)
  File "C:\Users\Rishabh\anaconda3\envs\jupiteraibot\lib\site-packages\gradio\blocks.py", line 1768, in postprocess_data
    prediction_value = block.postprocess(prediction_value)
  File "C:\Users\Rishabh\anaconda3\envs\jupiteraibot\lib\site-packages\gradio\components\chatbot.py", line 494, in postprocess
    return self._postprocess_messages_tuples(cast(TupleFormat, value))
  File "C:\Users\Rishabh\anaconda3\envs\jupitera