In [1]:
!pip install langchain faiss-cpu sentence-transformers openai pandas

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp310-cp310-win_amd64.whl.metadata (5.1 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting openai
  Downloading openai-1.97.1-py3-none-any.whl.metadata (29 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.54.0-py3-none-any.whl.metadata (41 kB)
Collecting tqdm (from sentence-transformers)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.7.1-cp310-cp310-win_amd64.whl.metadata (28 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.7.1-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting scipy (from sentence-transformers)
  Using cached scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Downloading huggingface_hub-0.34

In [6]:
!pip install -U langchain-huggingface

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl.metadata (996 bytes)
Downloading langchain_huggingface-0.3.1-py3-none-any.whl (27 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.3.1


In [8]:
!pip install ipywidgets



In [9]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [10]:
import pandas as pd
from langchain.schema import Document

df = pd.read_csv("Training Dataset.csv")
docs = [Document(page_content=str(row.to_dict())) for _, row in df.iterrows()]

In [11]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding_model)

In [12]:
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

hf_pipeline = pipeline("text-generation", model="distilgpt2", max_new_tokens=100)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)

rag_chain = create_retrieval_chain(retriever, document_chain)

Device set to use cpu


In [13]:
query = "What factors affect loan approval?"
response = rag_chain.invoke({"input": query})
print("Answer:", response["answer"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Answer: Human: Answer the following question based only on the provided context:

<context>
{'Loan_ID': 'LP001800', 'Gender': 'Male', 'Married': 'Yes', 'Dependents': '1', 'Education': 'Not Graduate', 'Self_Employed': 'No', 'ApplicantIncome': 2510, 'CoapplicantIncome': 1983.0, 'LoanAmount': 140.0, 'Loan_Amount_Term': 180.0, 'Credit_History': 1.0, 'Property_Area': 'Urban', 'Loan_Status': 'N'}

{'Loan_ID': 'LP002265', 'Gender': 'Male', 'Married': 'Yes', 'Dependents': '2', 'Education': 'Not Graduate', 'Self_Employed': 'No', 'ApplicantIncome': 1993, 'CoapplicantIncome': 1625.0, 'LoanAmount': 113.0, 'Loan_Amount_Term': 180.0, 'Credit_History': 1.0, 'Property_Area': 'Semiurban', 'Loan_Status': 'Y'}

{'Loan_ID': 'LP002115', 'Gender': 'Male', 'Married': 'Yes', 'Dependents': '3+', 'Education': 'Not Graduate', 'Self_Employed': 'No', 'ApplicantIncome': 2647, 'CoapplicantIncome': 1587.0, 'LoanAmount': 173.0, 'Loan_Amount_Term': 360.0, 'Credit_History': 1.0, 'Property_Area': 'Rural', 'Loan_Status': 