In [1]:
!pip install transformers datasets torch



In [2]:
import json
data = json.load(open('z.json'))

In [3]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
!pip install faiss-gpu-cu12



In [5]:
import faiss
import numpy as np

In [6]:
faiss_index = faiss.read_index("index.faiss")

In [7]:
!pip install sentence-transformers



In [8]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer("all-MiniLM-L6-v2").to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
texts = []
metadata = []
for entry in data:
  card_name = entry["card_name"]
  section_type = entry["section_type"]
  details = entry["details"]

  for detail in details:
    if "question" in detail and "answer" in detail:
      text = f"{card_name} - {section_type}: Question: {detail['question']} Answer: {detail['answer']}"
      texts.append(text)
      metadata.append((card_name,section_type,text))
    else:
      if isinstance(detail["value"], list):
        for nested_detail in detail["value"]:
          text = f"{card_name} - {section_type}: {detail['type']} - {nested_detail['type']} : {nested_detail['value']}"
          texts.append(text)
          metadata.append((card_name, section_type, text))
      else:
        text = f"{card_name} - {section_type}: {detail['type']} : {detail['value']}"
        texts.append(text)
        metadata.append((card_name, section_type, text))

In [10]:
embeddings = embedding_model.encode(texts, convert_to_numpy=True)
dimension = embeddings.shape[1]
dimension

384

In [11]:
def handle_query_faiss(query, card_name_filter=None, top_k=3):
  query_embedding = embedding_model.encode([query], convert_to_numpy=True)
  distances, indices = faiss_index.search(query_embedding, top_k)

  results = []
  for dist, idx in zip(distances[0], indices[0]):
    card_name, section_type, text = metadata[idx]
    if card_name_filter and card_name != card_name_filter:
      continue
    results.append({
        "distance": dist,
        "card_name": card_name,
        "section_type": section_type,
        "text": text
    })
  return results

In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-2-zephyr-1_6b')
model = AutoModelForCausalLM.from_pretrained(
    'stabilityai/stablelm-2-zephyr-1_6b',
    device_map="auto"
).to(device)

In [13]:
# def generate_answer(query, identified_card=None, top_k=3):
#   retrieved_contexts = handle_query_faiss(query, identified_card, top_k)
#   context_texts = [context["text"] for context in retrieved_contexts]
#   answer = pipe(question=query, context=' '.join(context_texts))['answer']

#   return answer, ' '.join(context_texts)

# def generate_answer(query, identified_card=None, top_k=3):
#   retrieved_contexts = handle_query_faiss(query, identified_card, top_k)
#   context_texts = [context["text"] for context in retrieved_contexts]
#   question = f"Answer the following question, by extracting the relevant answer from the context provided. Do not use any other knowledge. Keep the answer concise. \n\n Question: {query} \n\n Context: {' '.join(context_texts)} \n\n Answer:"
#   # answer = pipe(question=query, context=' '.join(context_texts))['answer']
#   messages = {
#       "role": "user",
#       "content": question
#   }
#   answer = pipe(messages)['generated_text']
#   return answer, ' '.join(context_texts)

def generate_answer(query, identified_card=None, top_k=3):
  retrieved_contexts = handle_query_faiss(query, identified_card, top_k)
  context_texts = [context["text"] for context in retrieved_contexts]
  question = f"Answer the following question, by extracting the relevant answer from the context provided. Do not use any other knowledge. Keep the answer concise. \n\n Question: {query} \n\n Context: {' '.join(context_texts)} \n\n Answer:"
  # answer = pipe(question=query, context=' '.join(context_texts))['answer']
  prompt = [{"role": "user", "content": question}]
  inputs = tokenizer.apply_chat_template(
      prompt,
      add_generation_prompt=True,
      return_tensors='pt'
  )
  tokens = model.generate(
      inputs.to(model.device),
      max_new_tokens=1024,
      temperature=0.5,
      do_sample=True
  )
  outputs = tokenizer.decode(tokens[0], skip_special_tokens=False)

  answer = outputs.split('<|assistant|>')[1].split('<|endoftext|')[0].strip()

  return answer

In [14]:
import re

card_names = [
    "Active Cash Visa Card",
    "Autograph Journey Visa Card",
    "Autograph Visa Card",
    "Reflect Visa Card",
    "Attune World Elite Mastercard",
    "Bilt World Elite Mastercard",
    "One Key Mastercard",
    "One Key+ Mastercard",
    "Choices Privileges World Elite Mastercard",
    "Choices Privileges Select World Elite Mastercard",
    "Signify Business Cash Card"
]

def identify_card_from_query(query):
    for card in card_names:
        if re.search(rf"\b{re.escape(card)}\b", query, re.IGNORECASE):
            return card
    return None

def handle_user_query(query, top_k=3):
    identified_card = identify_card_from_query(query)

    if identified_card:
      print(f"Card identified: {identified_card}\nProceeding with your query...")
      return generate_answer(query, identified_card, top_k)
    else:
        card_list = "\n".join(card_names)
        print(f"Could not identify the card. Please mention the card name along with your query again.\nHere is a list of available card names:\n{card_list}")

In [15]:
!pip install gradio



In [None]:
import gradio as gr
iface = gr.Interface(
    fn=handle_user_query,
    inputs='text',
    outputs='text',
    allow_flagging='never')
iface.launch(share=True)