In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
import json
import os
from pathlib import Path
from dotenv import load_dotenv
from service.summary_service.summarizer_prompt import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite")

In [3]:
def load_json(file_path):
    '''
    input: json file path
    output: text
    Reads a JSON file and returns a pretty string for model input. 
    '''

    with open(Path(file_path), "r") as f:
        json_text = json.load(f)
    return json.dumps(json_text, indent=2)

In [4]:
bank_statement_json_path = "/Users/nilanjansahu/Downloads/LandingAI/landing_ai_kyc/backend/resources/740d8179-4cbb-4f36-b5b1-4c888e7bf487/bank-statements/output/bank-statements.json"
credit_report_json_path = "/Users/nilanjansahu/Downloads/LandingAI/landing_ai_kyc/backend/resources/740d8179-4cbb-4f36-b5b1-4c888e7bf487/credit-reports/output/credit-reports.json"
identity_doc_json_path = "/Users/nilanjansahu/Downloads/LandingAI/landing_ai_kyc/backend/resources/740d8179-4cbb-4f36-b5b1-4c888e7bf487/identity-documents/output/identity-documents.json"
income_proof_json_path = "/Users/nilanjansahu/Downloads/LandingAI/landing_ai_kyc/backend/resources/740d8179-4cbb-4f36-b5b1-4c888e7bf487/income-proof/output/income-proof.json"
tax_statement_json_path = "/Users/nilanjansahu/Downloads/LandingAI/landing_ai_kyc/backend/resources/740d8179-4cbb-4f36-b5b1-4c888e7bf487/tax-statements/output/tax-statements.json"

In [18]:
system_prompt = ''' 
You are a **Loan Approver Assistant**.
Your task is to compare two data sources — a customer's bank statement and salary slips — both provided in JSON format.

Identify and list any **factual discrepancies** between the two datasets.

Focus specifically on:
- Differences between the salary amount credited in the bank statement and the gross/net pay in the salary slips.
- Missing or inconsistent salary deposit dates in the bank statement compared to salary slip pay dates.
- Differences in employer name or salary source description between both documents.
- Unusual patterns such as multiple salary credits in one month, missing months, or partial credits.

Follow these strict rules:
- Do **not** make assumptions or judgments about reasons for discrepancies.
- Do **not** assess eligibility, risk, or authenticity.
- Report only **factual mismatches or inconsistencies** found between the JSON datasets.

Present your findings in a clear and concise summary labeled:
**“Discrepancy Summary:”**
If no discrepancies are found, state: **“No discrepancies found between bank statement and salary slips.”**
If there are discrepancies in bank statement and pay slips like name of company or the amount credited is not matching then send out a warning stating that: 
*"Warning:"* 
Upload the bank statement where your salary is credited"
'''

human_prompt = ''' 
Here is the customer's bank statement data in JSON format:
{bank_statement_json}

Here is the customer's salary slip data in JSON format:
{salary_slips_json}
'''

In [19]:
def fraud_detection(llm, bank_json_path, income_json_path, system_prompt, human_prompt):
    bank_json = load_json(bank_json_path)
    income_json = load_json(income_json_path)

    system_template = SystemMessagePromptTemplate.from_template(system_prompt)
    human_template = HumanMessagePromptTemplate.from_template(human_prompt)

    chat_prompt = ChatPromptTemplate.from_messages([system_template, human_template])
    formated_message = chat_prompt.format_messages(bank_statement_json=bank_json, salary_slips_json=income_json)
    response = llm.invoke(formated_message)
    return response.content


In [20]:
response = fraud_detection(llm, bank_statement_json_path, income_proof_json_path, system_prompt, human_prompt)

In [21]:
print(response)

Discrepancy Summary:
- The bank statement shows a "Payroll Deposit - HOTEL" on "Apr-01-14" with an amount of "694.81". This amount does not match the net pay of "$5,500.00" or gross earnings of "$7,600.00" from the salary slip.
- The bank statement shows another "Payroll Deposit - HOTEL" on "Apr-01-30" with an empty amount field.
- The employer name in the bank statement is "HOTEL", while the employer name in the salary slip is "Acme Hospitality LLC".
- The dates in the bank statement are in "Apr-01-XX" and "Sep-03-XX" format, while the pay date in the salary slip is "07/31/2023". This indicates a mismatch in the time period covered by the documents.

Upload the bank statement where your salary is credited
