In [6]:
pip install pandas faiss-cpu sentence-transformers openai gradio

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
openai.api_key = os.getenv("OPENAI_API_KEY")

In [8]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle

# Load dataset
df = pd.read_csv("Training Dataset.csv")
df = df.fillna("Unknown")  # Clean NaNs

# Convert each row into a natural language chunk
def row_to_text(row):
    return (
        f"Applicant is a {row['Gender']} who is "
        f"{'married' if row['Married']=='Yes' else 'not married'}, "
        f"with {row['Dependents']} dependents, "
        f"education level: {row['Education']}, "
        f"self-employed: {row['Self_Employed']}, "
        f"income: {row['ApplicantIncome']}, "
        f"loan amount: {row['LoanAmount']}, "
        f"loan term: {row['Loan_Amount_Term']}, "
        f"credit history: {row['Credit_History']}, "
        f"property area: {row['Property_Area']}, "
        f"loan status: {'approved' if row['Loan_Status'] == 'Y' else 'not approved'}."
    )

docs = df.apply(row_to_text, axis=1).tolist()

# Load embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = embedder.encode(docs, show_progress_bar=True)

# Save with FAISS
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(doc_embeddings))

# Save files
faiss.write_index(index, "loan_index.faiss")
with open("loan_texts.pkl", "wb") as f:
    pickle.dump(docs, f)

  return forward_call(*args, **kwargs)
Batches: 100%|██████████| 20/20 [00:10<00:00,  1.97it/s]


In [9]:
import openai
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

openai.api_key = "YOUR_OPENAI_API_KEY"

# Load index + docs
index = faiss.read_index("loan_index.faiss")
with open("loan_texts.pkl", "rb") as f:
    docs = pickle.load(f)

# Load embedder
embedder = SentenceTransformer('all-MiniLM-L6-v2')

def get_top_k_docs(query, k=5):
    query_embedding = embedder.encode([query])
    D, I = index.search(np.array(query_embedding), k)
    return [docs[i] for i in I[0]]

def generate_answer(query):
    top_docs = get_top_k_docs(query, k=5)
    context = "\n".join(top_docs)

    prompt = f"""You are a helpful assistant for loan prediction based on the dataset.
Use the context to answer user questions intelligently.

Context:
{context}

Question:
{query}

Answer:"""

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
    )

    return response['choices'][0]['message']['content']

In [10]:
import gradio as gr

def rag_chatbot(query):
    try:
        return generate_answer(query)
    except Exception as e:
        return f"Error: {str(e)}"

gr.Interface(fn=rag_chatbot, 
             inputs=gr.Textbox(label="Ask about loan approvals 📊"), 
             outputs="text",
             title="💬 RAG Loan Chatbot",
             description="Ask questions like 'What causes rejection?', 'How important is credit history?', etc."
            ).launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


