In [18]:
pip install faker

Note: you may need to restart the kernel to use updated packages.
Collecting faker
  Downloading Faker-35.2.0-py3-none-any.whl (1.9 MB)
Installing collected packages: faker
Successfully installed faker-35.2.0


You should consider upgrading via the 'c:\Users\KIIT\AppData\Local\Programs\Python\Python38\python.exe -m pip install --upgrade pip' command.


In [19]:
from faker import Faker

In [20]:

import random

fake = Faker()
from datetime import date
from dateutil.relativedelta import relativedelta

six_months = date.today() - relativedelta(months=+6)
three_months = date.today() - relativedelta(months=+3)
months = [three_months, six_months]

# Generate demographic and personal information
def generate_customer_data():
    age = random.randint(20, 70)
    gender = random.choice(['Male', 'Female'])
    marital_status = random.choice(['Single', 'Married', 'Divorced', 'Widowed'])
    income_level = random.choice(['Low', 'Medium', 'High'])
    education = random.choice(['High School', 'College', 'University'])
    occupation = fake.job()
    residential_status = random.choice(['Owns house', 'Rents', 'Living with parents'])
    dependents = random.randint(0, 5),  # Number of dependents
    debt_to_income = round(random.uniform(0.1, 0.5), 2),  # Debt-to-income ratio
    credit_bureau = random.randint(400, 850)

    return {
        'Age': age,
        'Gender': gender,
        'Marital Status': marital_status,
        'Income Level': income_level,
        'Education': education,
        'Occupation': occupation,
        'Residential Status': residential_status,
        'Dependents': dependents,
        'Debt-to-Income': debt_to_income,
        'Credit_Bureau': credit_bureau
    }

# Function to generate bureau product inquiries
def generate_inquiries(last_months):
    inquiries = []
    today = fake.date_this_month()

    # Generate inquiries for the last `last_months` period
    for _ in range(random.randint(1, 5)):  # Random number of inquiries
        inquiry_date = fake.date_between(start_date=last_months, end_date=today)
        product_type = random.choice(['Personal Loan', 'Credit Card', 'Mortgage'])
        inquiries.append({'product_name': product_type, 'date': inquiry_date})

    return inquiries if inquiries else []

In [21]:
# Function to generate dataset
def generate_dataset(num_rows,months):
    data_rows = []

    for _ in range(num_rows):
        customer_data = generate_customer_data()
        last_3_months_inquiries = generate_inquiries(months[0])
        last_6_months_inquiries = generate_inquiries(months[1])

        # Initialize columns for each product type
        customer_row = {
            'Customer ID': fake.uuid4(),
            'Age': customer_data['Age'],
            'Gender': customer_data['Gender'],
            'Marital Status': customer_data['Marital Status'],
            'Income Level': customer_data['Income Level'],
            'Education': customer_data['Education'],
            'Occupation': customer_data['Occupation'],
            'Residential Status': customer_data['Residential Status'],
            'Dependents': customer_data['Dependents'],
            'Debt-to-Income': customer_data['Debt-to-Income'],
            'Credit_Bureau': customer_data['Credit_Bureau']
        }

        # Process last 3 months inquiries
        for product_type in ['Personal Loan', 'Credit Card', 'Mortgage']:
            inq_in_last_3_months = any(inq['product_name'] == product_type for inq in last_3_months_inquiries)
            customer_row[f'last_3months_{product_type.replace(" ", "_").lower()}_inq'] = inq_in_last_3_months

        # Process last 6 months inquiries
        for product_type in ['Personal Loan', 'Credit Card', 'Mortgage']:
            inq_in_last_6_months = any(inq['product_name'] == product_type for inq in last_6_months_inquiries)
            customer_row[f'last_6months_{product_type.replace(" ", "_").lower()}_inq'] = inq_in_last_6_months

        data_rows.append(customer_row)

    return data_rows

# Example usage to generate 50 rows of data
dataset = generate_dataset(500, months)

## Load the Data

In [22]:
import pandas as pd
import numpy as np
import os
import warnings

In [23]:
warnings.filterwarnings('ignore')
df = pd.DataFrame(dataset)
df.to_csv("products_info.csv")

In [24]:
df.head()

Unnamed: 0,Customer ID,Age,Gender,Marital Status,Income Level,Education,Occupation,Residential Status,Dependents,Debt-to-Income,Credit_Bureau,last_3months_personal_loan_inq,last_3months_credit_card_inq,last_3months_mortgage_inq,last_6months_personal_loan_inq,last_6months_credit_card_inq,last_6months_mortgage_inq
0,f729f68b-9be3-486f-9e11-652368aec58f,59,Female,Married,Medium,College,Press photographer,Owns house,"(3,)","(0.15,)",455,False,True,True,True,False,False
1,055a1b54-ef42-4346-b793-bf703c83e799,63,Male,Divorced,Low,College,"Buyer, retail",Rents,"(0,)","(0.22,)",833,False,True,True,True,True,False
2,ef9e2e55-5498-4f72-a7b2-fb6a51b675e9,31,Male,Married,High,High School,Orthoptist,Rents,"(2,)","(0.2,)",642,False,False,True,True,False,True
3,5859582d-fdec-4e53-a6c4-71853f1c54cb,62,Male,Divorced,Medium,College,Computer games developer,Owns house,"(0,)","(0.37,)",539,True,True,True,True,False,False
4,5c462e59-a439-46f5-8eb4-874c6f219213,48,Female,Married,Low,High School,Drilling engineer,Living with parents,"(2,)","(0.19,)",848,True,True,False,True,True,True


In [25]:
df.shape

(500, 17)

# Store the Data in Vector DB

In [26]:
dataset[0]

{'Customer ID': 'f729f68b-9be3-486f-9e11-652368aec58f',
 'Age': 59,
 'Gender': 'Female',
 'Marital Status': 'Married',
 'Income Level': 'Medium',
 'Education': 'College',
 'Occupation': 'Press photographer',
 'Residential Status': 'Owns house',
 'Dependents': (3,),
 'Debt-to-Income': (0.15,),
 'Credit_Bureau': 455,
 'last_3months_personal_loan_inq': False,
 'last_3months_credit_card_inq': True,
 'last_3months_mortgage_inq': True,
 'last_6months_personal_loan_inq': True,
 'last_6months_credit_card_inq': False,
 'last_6months_mortgage_inq': False}

In [27]:
df['content'] = [f"Based on the following customer data: {data}, suggest suitable banking lending products." for data in dataset]
df.head()

Unnamed: 0,Customer ID,Age,Gender,Marital Status,Income Level,Education,Occupation,Residential Status,Dependents,Debt-to-Income,Credit_Bureau,last_3months_personal_loan_inq,last_3months_credit_card_inq,last_3months_mortgage_inq,last_6months_personal_loan_inq,last_6months_credit_card_inq,last_6months_mortgage_inq,content
0,f729f68b-9be3-486f-9e11-652368aec58f,59,Female,Married,Medium,College,Press photographer,Owns house,"(3,)","(0.15,)",455,False,True,True,True,False,False,Based on the following customer data: {'Custom...
1,055a1b54-ef42-4346-b793-bf703c83e799,63,Male,Divorced,Low,College,"Buyer, retail",Rents,"(0,)","(0.22,)",833,False,True,True,True,True,False,Based on the following customer data: {'Custom...
2,ef9e2e55-5498-4f72-a7b2-fb6a51b675e9,31,Male,Married,High,High School,Orthoptist,Rents,"(2,)","(0.2,)",642,False,False,True,True,False,True,Based on the following customer data: {'Custom...
3,5859582d-fdec-4e53-a6c4-71853f1c54cb,62,Male,Divorced,Medium,College,Computer games developer,Owns house,"(0,)","(0.37,)",539,True,True,True,True,False,False,Based on the following customer data: {'Custom...
4,5c462e59-a439-46f5-8eb4-874c6f219213,48,Female,Married,Low,High School,Drilling engineer,Living with parents,"(2,)","(0.19,)",848,True,True,False,True,True,True,Based on the following customer data: {'Custom...


In [28]:

df['content'][0]

"Based on the following customer data: {'Customer ID': 'f729f68b-9be3-486f-9e11-652368aec58f', 'Age': 59, 'Gender': 'Female', 'Marital Status': 'Married', 'Income Level': 'Medium', 'Education': 'College', 'Occupation': 'Press photographer', 'Residential Status': 'Owns house', 'Dependents': (3,), 'Debt-to-Income': (0.15,), 'Credit_Bureau': 455, 'last_3months_personal_loan_inq': False, 'last_3months_credit_card_inq': True, 'last_3months_mortgage_inq': True, 'last_6months_personal_loan_inq': True, 'last_6months_credit_card_inq': False, 'last_6months_mortgage_inq': False}, suggest suitable banking lending products."

In [25]:

!pip install langchain langchain-community langchain-core transformers

Collecting langchain-community
  Downloading langchain_community-0.3.16-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB

In [26]:
!pip install langchain langchain-community langchain-core transformers




In [29]:
from langchain.docstore.document import Document

# Prepare documents for LangChain
documents = []
for _, row in df.iterrows():
    documents.append(Document(page_content=row["content"], metadata={"class": row["Age"]}))

In [28]:
!pip install sentence-transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

In [30]:
from langchain_community.embeddings import HuggingFaceEmbeddings
hg_embeddings = HuggingFaceEmbeddings()

In [3]:
!pip install faiss-cpu



In [31]:
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

In [None]:
# Generate embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Build FAISS index
faiss_index = FAISS.from_documents(documents, embeddings)
faiss_index.save_local('docs/faiss_rag/')  # This will create index.faiss & index.pkl


print("FAISS vector store created and saved successfully.")


FAISS vector store created and saved successfully.


: 

In [36]:
!pip install langchain-groq

Collecting langchain-groq
  Downloading langchain_groq-0.2.4-py3-none-any.whl.metadata (3.0 kB)
Collecting groq<1,>=0.4.1 (from langchain-groq)
  Downloading groq-0.17.0-py3-none-any.whl.metadata (14 kB)
Collecting langchain-core<0.4.0,>=0.3.33 (from langchain-groq)
  Downloading langchain_core-0.3.33-py3-none-any.whl.metadata (6.3 kB)
Downloading langchain_groq-0.2.4-py3-none-any.whl (14 kB)
Downloading groq-0.17.0-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.8/109.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.33-py3-none-any.whl (412 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m412.7/412.7 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain-core, langchain-groq
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.32
    Uninstalling langchain-core-0.3.32:
      Successfully uninstalled langchain

In [37]:
from google.colab import userdata
# LLM with function call
from langchain_groq import ChatGroq
import os
groq_api_key=userdata.get('groq_api_key')
os.environ["GROQ_API_KEY"]=groq_api_key
from langchain_groq import ChatGroq
llm = ChatGroq(
    groq_api_key=os.getenv("GROQ_API_KEY"),
    model_name='gemma2-9b-it',
    temperature=0.1
)

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7a90c204a110>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7a90c2c38990>, model_name='gemma2-9b-it', temperature=0.1, model_kwargs={}, groq_api_key=SecretStr('**********'))

# RAG

In [45]:
from IPython.display import display, Markdown
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

def colorize_text(text):
    for word, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
        text = text.replace(f"{word}:", f"\n\n**<font color='{color}'>{word}:</font>**")
    return text

question = "What is Recommendation Engie and How it used in Finance Domain?"

# Wrap the question in a HumanMessage object
messages = [HumanMessage(content=question)]

# Generating response using ChatGroq
response = llm(messages) # Pass the messages list to ChatGroq

full_response = f"Question: {question}\nAnswer: {response.content}"
display(Markdown(colorize_text(full_response)))



**<font color='red'>Question:</font>** What is Recommendation Engie and How it used in Finance Domain?


**<font color='green'>Answer:</font>** Let's break down Recommendation Engines and their role in finance.

**What is a Recommendation Engine?**

A recommendation engine is a type of software system that analyzes user data and behavior to suggest relevant items, products, or services. Think of it like the "You might also like" sections on shopping websites or the movie suggestions on streaming platforms.

**How They Work:**

Recommendation engines rely on sophisticated algorithms that learn from patterns in user interactions.  Here are some common techniques:

* **Collaborative Filtering:**  This method identifies users with similar tastes and recommends items liked by those similar users.
* **Content-Based Filtering:**  This approach analyzes the characteristics of items a user has interacted with in the past and recommends similar items.
* **Hybrid Systems:**  Many engines combine collaborative and content-based filtering for more accurate and diverse recommendations.

**Finance Domain Applications:**

Recommendation engines are transforming the financial landscape in several ways:

* **Personalized Financial Advice:**

   * **Investment Recommendations:**  Suggesting stocks, bonds, mutual funds, or ETFs based on a user's risk tolerance, investment goals, and past performance.
   * **Retirement Planning:**  Recommending savings plans, investment strategies, and withdrawal strategies tailored to individual retirement needs.
   * **Debt Management:**  Identifying potential debt consolidation options, balance transfer offers, or strategies for paying down debt faster.

* **Product and Service Recommendations:**

   * **Banking Products:**  Recommending checking accounts, savings accounts, credit cards, or loans based on a user's financial profile and needs.
   * **Insurance Products:**  Suggesting appropriate insurance coverage (health, life, auto, home) based on a user's circumstances and risk factors.
   * **Financial Tools and Resources:**  Recommending budgeting apps, financial calculators, or educational materials relevant to a user's financial goals.

* **Fraud Detection:**

   * **Anomaly Detection:**  Identifying unusual transaction patterns that may indicate fraudulent activity.
   * **Risk Scoring:**  Assessing the likelihood of a customer engaging in fraudulent behavior.

**Benefits:**

* **Improved Customer Experience:** Personalized recommendations make financial products and services more relevant and engaging.
* **Increased Efficiency:**  Automating financial advice and product suggestions saves time and resources for both customers and financial institutions.
* **Enhanced Revenue:**  Targeted recommendations can lead to higher conversion rates and increased sales.
* **Reduced Risk:**  Fraud detection capabilities help protect both customers and institutions from financial losses.

**Challenges:**

* **Data Privacy:**  Recommendation engines rely on vast amounts of user data, raising concerns about privacy and security.
* **Bias:**  Algorithms can perpetuate existing biases in data, leading to unfair or discriminatory recommendations.
* **Transparency:**  The decision-making processes of complex algorithms can be difficult to understand, making it challenging to address bias or errors.


Let me know if you'd like to explore any of these aspects in more detail!


# Building the RAG QA Chain using Langchain and Create Chatbot Interface

In [46]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceHub
from IPython.display import display, Markdown
import os
import warnings
warnings.filterwarnings('ignore')

In [48]:
# Define the prompt template
template = """
Based on the following customer data, that I Provide, suggest one suitable banking lending products."
Customer Information: {question}
Context: {context}
Answer:
"""
PROMPT = PromptTemplate(input_variables=["context", "query"], template=template)

retriever = faiss_index.as_retriever(search_kwargs={"k": 1})

qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=retriever, chain_type_kwargs={"prompt": PROMPT}
)

In [49]:
question = dataset[0]
question

{'Customer ID': 'a0409bc1-7206-4ad1-b690-0b23a834ea08',
 'Age': 42,
 'Gender': 'Male',
 'Marital Status': 'Single',
 'Income Level': 'High',
 'Education': 'High School',
 'Occupation': 'Programmer, systems',
 'Residential Status': 'Rents',
 'Dependents': (2,),
 'Debt-to-Income': (0.29,),
 'Credit_Bureau': 412,
 'last_3months_personal_loan_inq': False,
 'last_3months_credit_card_inq': True,
 'last_3months_mortgage_inq': True,
 'last_6months_personal_loan_inq': True,
 'last_6months_credit_card_inq': True,
 'last_6months_mortgage_inq': True}

In [50]:
import json
data_string = json.dumps(question, indent=4)
data_string

'{\n    "Customer ID": "a0409bc1-7206-4ad1-b690-0b23a834ea08",\n    "Age": 42,\n    "Gender": "Male",\n    "Marital Status": "Single",\n    "Income Level": "High",\n    "Education": "High School",\n    "Occupation": "Programmer, systems",\n    "Residential Status": "Rents",\n    "Dependents": [\n        2\n    ],\n    "Debt-to-Income": [\n        0.29\n    ],\n    "Credit_Bureau": 412,\n    "last_3months_personal_loan_inq": false,\n    "last_3months_credit_card_inq": true,\n    "last_3months_mortgage_inq": true,\n    "last_6months_personal_loan_inq": true,\n    "last_6months_credit_card_inq": true,\n    "last_6months_mortgage_inq": true\n}'

In [51]:

try:
    result = qa_chain({"query": data_string})
    display(result)
except RuntimeError as e:
    print(f"RuntimeError encountered: {e}")

{'query': '{\n    "Customer ID": "a0409bc1-7206-4ad1-b690-0b23a834ea08",\n    "Age": 42,\n    "Gender": "Male",\n    "Marital Status": "Single",\n    "Income Level": "High",\n    "Education": "High School",\n    "Occupation": "Programmer, systems",\n    "Residential Status": "Rents",\n    "Dependents": [\n        2\n    ],\n    "Debt-to-Income": [\n        0.29\n    ],\n    "Credit_Bureau": 412,\n    "last_3months_personal_loan_inq": false,\n    "last_3months_credit_card_inq": true,\n    "last_3months_mortgage_inq": true,\n    "last_6months_personal_loan_inq": true,\n    "last_6months_credit_card_inq": true,\n    "last_6months_mortgage_inq": true\n}',
 'result': "Based on the provided customer data, here's a suitable lending product suggestion:\n\n**Product:**  **Secured Personal Loan**\n\n**Reasoning:**\n\n* **High Income:** The customer has a high income, indicating a good ability to repay the loan.\n* **Moderate Debt-to-Income Ratio:**  A debt-to-income ratio of 0.29 suggests manage