In [None]:
# !pip install torch
# !pip install transformers==4.35.2
# !pip install sentence-transformers
# !pip install faiss-cpu
# !pip install gradio

In [7]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import faiss
import numpy as np
import torch
import time

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
EMBEDDING_MODEL = 'all-MiniLM-L6-v2'
GENERATOR_MODEL = 'google/flan-t5-small'
TOP_K = 3
MAX_INPUT_LENGTH = 512
MAX_OUTPUT_TOKENS = 100

In [10]:
def load_and_prepare_documents(train_path='Training-Dataset.csv', test_path='Test-Dataset.csv'):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    combined_df = pd.concat([train_df, test_df], axis=0).fillna("Unknown")

    documents = [
        " | ".join(f"{col}: {str(row[col])}" for col in combined_df.columns)
        for _, row in combined_df.iterrows()
    ]
    return documents

In [11]:
def create_faiss_index(documents, embedder):
    embeddings = embedder.encode(documents, convert_to_numpy=True)
    dim = embeddings[0].shape[0]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))
    return index, embeddings

In [12]:
def retrieve_context(query, embedder, index, documents, top_k=TOP_K):
    query_vector = embedder.encode([query])
    _, indices = index.search(np.array(query_vector), top_k)
    return [documents[i] for i in indices[0]]

In [13]:
def setup_generator(model_name=GENERATOR_MODEL):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    return tokenizer, model.to(device), device


In [14]:
def answer_question_rag(query, embedder, index, documents, tokenizer, model, device):
    try:
        context_docs = retrieve_context(query, embedder, index, documents, top_k=TOP_K)
        context = " ".join(context_docs)

        prompt = f"Context: {context} \n\nQuestion: {query} \n\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_INPUT_LENGTH).to(device)
        outputs = model.generate(**inputs, max_new_tokens=MAX_OUTPUT_TOKENS)
        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return answer
    except Exception as e:
        return f"Error: {str(e)}"

In [15]:
if __name__ == "__main__":
    start_time = time.time()
    
    print("Loading documents...")
    documents = load_and_prepare_documents()

    print("Loading embedding model...")
    embedder = SentenceTransformer(EMBEDDING_MODEL)

    print("Building FAISS index...")
    index, _ = create_faiss_index(documents, embedder)

    print("Loading generator model...")
    tokenizer, model, device = setup_generator()

    print("System ready. Asking question...\n")
    question = "What is the average loan amount?"
    answer = answer_question_rag(question, embedder, index, documents, tokenizer, model, device)

    print(f"Q: {question}")
    print(f"A: {answer}")

    print(f"\nCompleted in {round(time.time() - start_time, 2)} seconds.")

Loading documents...
Loading embedding model...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Building FAISS index...


  return forward_call(*args, **kwargs)


Loading generator model...
System ready. Asking question...

Q: What is the average loan amount?
A: The average loan amount is £25.

Completed in 48.65 seconds.


In [16]:
import gradio as gr

def rag_chatbot(query):
    return answer_question_rag(query, embedder, index, documents, tokenizer, model, device)

gr.Interface(fn=rag_chatbot, inputs="text", outputs="text", title="Loan Dataset RAG Chatbot").launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


