In [8]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline

In [9]:
df = pd.read_csv("Training Dataset.csv")
print("Total rows in dataset:", len(df))
df.head()

Total rows in dataset: 614


Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [10]:
documents = []

for _, row in df.iterrows():
    doc = f"""
    Applicant ID: {row['Loan_ID']}
    Gender: {row['Gender']}
    Married: {row['Married']}
    Education: {row['Education']}
    Self Employed: {row['Self_Employed']}
    Applicant Income: {row['ApplicantIncome']}
    Coapplicant Income: {row.get('CoapplicantIncome', 'N/A')}
    Loan Amount: {row['LoanAmount']}
    Loan Amount Term: {row.get('Loan_Amount_Term', 'N/A')}
    Credit History: {row.get('Credit_History', 'N/A')}
    Property Area: {row.get('Property_Area', 'N/A')}
    Loan Status: {row['Loan_Status']}
    """
    documents.append(doc.strip())

In [11]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedding_model.encode(documents)

index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(np.array(doc_embeddings))

print("Documents embedded and index created.")

Documents embedded and index created.


In [None]:
generator = pipeline("text2text-generation", model="google/flan-t5-small")

In [13]:
def retrieve_top_k(query, k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k)
    return [documents[i] for i in indices[0]]

def generate_answer(query, context_docs):
    context = "\n".join(context_docs)
    prompt = f"Answer the question based on the context below:\n{context}\n\nQuestion: {query}"
    result = generator(prompt, max_length=256, do_sample=False)
    return result[0]['generated_text']

In [14]:
while True:
    query = input("\nAsk a question about loan data (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        print("Chatbot session ended.")
        break

    top_docs = retrieve_top_k(query)
    answer = generate_answer(query, top_docs)

    print("\nAnswer:")
    print(answer)

    print("\nContext Used:")
    for doc in top_docs:
        print("-----")
        print(doc)


Ask a question about loan data (or type 'exit' to quit):  How many applicants are self-employed?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
3

Context Used:
-----
Applicant ID: LP002868
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 3159
    Coapplicant Income: 461.0
    Loan Amount: 108.0
    Loan Amount Term: 84.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: Y
-----
Applicant ID: LP002983
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 8072
    Coapplicant Income: 240.0
    Loan Amount: 253.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: Y
-----
Applicant ID: LP002277
    Gender: Female
    Married: No
    Education: Graduate
    Self Employed: No
    Applicant Income: 3180
    Coapplicant Income: 0.0
    Loan Amount: 71.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N



Ask a question about loan data (or type 'exit' to quit):  Tell me about the loan status of female applicants.


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
Applicants with a PG&E or PG&E are eligible to apply for a loan.

Context Used:
-----
Applicant ID: LP002912
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 4283
    Coapplicant Income: 3000.0
    Loan Amount: 172.0
    Loan Amount Term: 84.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: N
-----
Applicant ID: LP001186
    Gender: Female
    Married: Yes
    Education: Graduate
    Self Employed: Yes
    Applicant Income: 11500
    Coapplicant Income: 0.0
    Loan Amount: 286.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP001267
    Gender: Female
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 1378
    Coapplicant Income: 1881.0
    Loan Amount: 167.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: N



Ask a question about loan data (or type 'exit' to quit):  What is the property area for married applicants?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
3366 Coapplicant Income: 2200.0 Loan Amount Term: 180.0 Credit History: 0.0 Property Area: Rural Loan Status: N

Context Used:
-----
Applicant ID: LP001197
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 3366
    Coapplicant Income: 2200.0
    Loan Amount: 135.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: N
-----
Applicant ID: LP001207
    Gender: Male
    Married: Yes
    Education: Not Graduate
    Self Employed: Yes
    Applicant Income: 2609
    Coapplicant Income: 3449.0
    Loan Amount: 165.0
    Loan Amount Term: 180.0
    Credit History: 0.0
    Property Area: Rural
    Loan Status: N
-----
Applicant ID: LP002789
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 3593
    Coapplicant Income: 4266.0
    Loan Amount: 132.0
    Loan Amount Term: 180.0
    Credit History: 0.0
    Property Area: Rural
    Loan Status: N



Ask a question about loan data (or type 'exit' to quit):  What is the average loan amount?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
4750.0 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term: 3750 Loan Amount Term

Context Used:
-----
Applicant ID: LP001256
    Gender: Male
    Married: No
    Education: Graduate
    Self Employed: No
    Applicant Income: 3750
    Coapplicant Income: 4750.0
    Loan Amount: 176.0
    Loan Amount Term: 360.0
    Credit History: 1.


Ask a question about loan data (or type 'exit' to quit):  Tell me about self-employed applicants who got loans.


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
Self-employed applicants who got loans

Context Used:
-----
Applicant ID: LP002912
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 4283
    Coapplicant Income: 3000.0
    Loan Amount: 172.0
    Loan Amount Term: 84.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: N
-----
Applicant ID: LP002959
    Gender: Female
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 12000
    Coapplicant Income: 0.0
    Loan Amount: 496.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Semiurban
    Loan Status: Y
-----
Applicant ID: LP001606
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 3497
    Coapplicant Income: 1964.0
    Loan Amount: 116.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: Y



Ask a question about loan data (or type 'exit' to quit):   Were married people more likely to get approval?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
no

Context Used:
-----
Applicant ID: LP001197
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 3366
    Coapplicant Income: 2200.0
    Loan Amount: 135.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: N
-----
Applicant ID: LP002941
    Gender: Male
    Married: Yes
    Education: Not Graduate
    Self Employed: Yes
    Applicant Income: 6383
    Coapplicant Income: 1000.0
    Loan Amount: 187.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: N
-----
Applicant ID: LP001097
    Gender: Male
    Married: No
    Education: Graduate
    Self Employed: Yes
    Applicant Income: 4692
    Coapplicant Income: 0.0
    Loan Amount: 106.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: N



Ask a question about loan data (or type 'exit' to quit):  How many graduates received loans?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
1750.0

Context Used:
-----
Applicant ID: LP001259
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: Yes
    Applicant Income: 1000
    Coapplicant Income: 3022.0
    Loan Amount: 110.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP002842
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 3417
    Coapplicant Income: 1750.0
    Loan Amount: 186.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: Y
-----
Applicant ID: LP002277
    Gender: Female
    Married: No
    Education: Graduate
    Self Employed: No
    Applicant Income: 3180
    Coapplicant Income: 0.0
    Loan Amount: 71.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N



Ask a question about loan data (or type 'exit' to quit):  How much salary is required to apply for a home loan?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
$80

Context Used:
-----
Applicant ID: LP001633
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 6400
    Coapplicant Income: 7250.0
    Loan Amount: 180.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP001784
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 5500
    Coapplicant Income: 1260.0
    Loan Amount: 170.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Rural
    Loan Status: Y
-----
Applicant ID: LP001238
    Gender: Male
    Married: Yes
    Education: Not Graduate
    Self Employed: Yes
    Applicant Income: 7100
    Coapplicant Income: 0.0
    Loan Amount: 125.0
    Loan Amount Term: 60.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: Y



Ask a question about loan data (or type 'exit' to quit):  Can a student apply for a loan without a co-signer?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
No

Context Used:
-----
Applicant ID: LP002288
    Gender: Male
    Married: Yes
    Education: Not Graduate
    Self Employed: No
    Applicant Income: 2889
    Coapplicant Income: 0.0
    Loan Amount: 45.0
    Loan Amount Term: 180.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP002277
    Gender: Female
    Married: No
    Education: Graduate
    Self Employed: No
    Applicant Income: 3180
    Coapplicant Income: 0.0
    Loan Amount: 71.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP001572
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 9323
    Coapplicant Income: 0.0
    Loan Amount: 75.0
    Loan Amount Term: 180.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: Y



Ask a question about loan data (or type 'exit' to quit):   What is the minimum CIBIL score required for loan approval?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
0

Context Used:
-----
Applicant ID: LP001256
    Gender: Male
    Married: No
    Education: Graduate
    Self Employed: No
    Applicant Income: 3750
    Coapplicant Income: 4750.0
    Loan Amount: 176.0
    Loan Amount Term: 360.0
    Credit History: 1.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP002500
    Gender: Male
    Married: Yes
    Education: Not Graduate
    Self Employed: No
    Applicant Income: 2947
    Coapplicant Income: 1664.0
    Loan Amount: 70.0
    Loan Amount Term: 180.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP001633
    Gender: Male
    Married: Yes
    Education: Graduate
    Self Employed: No
    Applicant Income: 6400
    Coapplicant Income: 7250.0
    Loan Amount: 180.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N



Ask a question about loan data (or type 'exit' to quit):  What documents are required to apply for a loan?


Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Answer:
Applicant ID: LP001228 Gender: Male Married: No Education: Not Graduate Self Employed: No Applicant Income: 3200 Coapplicant Income: 2254.0 Loan Amount Term: 180.0 Credit History: 0.0 Property Area: Urban Loan Status: N Applicant ID: LP001228 Gender: Male Married: No Education: Not Graduate Self Employed: No Applicant Income: 3200 Coapplicant Income: 2254.0 Loan Amount Term: 180.0 Credit History: 0.0 Property Area: Urban Loan Status: N

Context Used:
-----
Applicant ID: LP001994
    Gender: Female
    Married: No
    Education: Graduate
    Self Employed: No
    Applicant Income: 2400
    Coapplicant Income: 1863.0
    Loan Amount: 104.0
    Loan Amount Term: 360.0
    Credit History: 0.0
    Property Area: Urban
    Loan Status: N
-----
Applicant ID: LP001800
    Gender: Male
    Married: Yes
    Education: Not Graduate
    Self Employed: No
    Applicant Income: 2510
    Coapplicant Income: 1983.0
    Loan Amount: 140.0
    Loan Amount Term: 180.0
    Credit History: 1.0
   


Ask a question about loan data (or type 'exit' to quit):  exit


Chatbot session ended.
