In [4]:
import pandas as pd
df = pd.read_csv('data/loan_data.csv.csv')
df.head()


Unnamed: 0,BorrowerID,Name,Income,LoanAmount,LoanType,AssetValue,District,TaxPaid,EMI,RepayStatus
0,1001,Ram Kumar,50000,300000,Car,150000,Ranchi,2000,8500,On Time
1,1002,Sunita Devi,70000,1200000,House,500000,Patna,3000,24000,Delayed
2,1003,Ravi Singh,40000,600000,Gold,200000,Gaya,1000,16000,On Time
3,1004,Neha Gupta,90000,150000,Online Loan,50000,Bokaro,4000,4200,On Time
4,1005,Amit Yadav,35000,250000,Bike,100000,Dhanbad,1500,7000,Delayed


In [6]:
df['EMI_OverIncome_Flag'] = (df['EMI'] > df['Income']).astype(int)


In [7]:
df['LowTaxFlag'] = ((df['TaxPaid'] < 0.05 * df['Income']) & (df['Income'] > 50000)).astype(int)


In [8]:
df['AssetHigh_ZeroTax_Flag'] = ((df['AssetValue'] > 200000) & (df['TaxPaid'] == 0)).astype(int)


In [9]:
df['MultiLoanFlag'] = df.duplicated(subset='Name', keep=False).astype(int)


In [10]:
def fraud_score(row):
    score = 0
    score += row['EMI_OverIncome_Flag'] * 30
    score += row['LowTaxFlag'] * 30
    score += row['AssetHigh_ZeroTax_Flag'] * 40
    return score

df['FraudScore'] = df.apply(fraud_score, axis=1)


In [11]:
def get_fraud_risk(score):
    if score >= 60:
        return 'High Risk'
    elif score >= 30:
        return 'Medium Risk'
    else:
        return 'Low Risk'

df['FraudRisk'] = df['FraudScore'].apply(get_fraud_risk)


In [12]:
df[['Name', 'Income', 'EMI', 'TaxPaid', 'AssetValue', 'FraudScore', 'FraudRisk']].head(10)


Unnamed: 0,Name,Income,EMI,TaxPaid,AssetValue,FraudScore,FraudRisk
0,Ram Kumar,50000,8500,2000,150000,0,Low Risk
1,Sunita Devi,70000,24000,3000,500000,30,Medium Risk
2,Ravi Singh,40000,16000,1000,200000,0,Low Risk
3,Neha Gupta,90000,4200,4000,50000,30,Medium Risk
4,Amit Yadav,35000,7000,1500,100000,0,Low Risk
5,Shalini Das,55000,22000,2000,100000,30,Medium Risk
6,Manoj Tiwari,120000,55000,10000,1500000,0,Low Risk
7,Seema Kumari,60000,8500,3000,50000,0,Low Risk
8,Alok Verma,45000,2800,1000,30000,0,Low Risk
9,Priya Sen,80000,14000,5000,300000,0,Low Risk


In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv('data/loan_data.csv.csv')

# Display first few rows
df.head()


Unnamed: 0,BorrowerID,Name,Income,LoanAmount,LoanType,AssetValue,District,TaxPaid,EMI,RepayStatus
0,1001,Ram Kumar,50000,300000,Car,150000,Ranchi,2000,8500,On Time
1,1002,Sunita Devi,70000,1200000,House,500000,Patna,3000,24000,Delayed
2,1003,Ravi Singh,40000,600000,Gold,200000,Gaya,1000,16000,On Time
3,1004,Neha Gupta,90000,150000,Online Loan,50000,Bokaro,4000,4200,On Time
4,1005,Amit Yadav,35000,250000,Bike,100000,Dhanbad,1500,7000,Delayed


In [2]:
df['EMIToIncomeRatio'] = df['EMI'] / df['Income']


In [3]:
df['SuspiciousEMIFlag'] = df['EMIToIncomeRatio'].apply(lambda x: 1 if x > 0.4 else 0)


In [4]:
df['SuspiciousEMIFlag'] = df['EMIToIncomeRatio'].apply(lambda x: 1 if x > 0.4 else 0)


In [5]:
df['LowTaxHighIncomeFlag'] = df.apply(lambda row: 1 if row['TaxPaid'] < 1000 and row['Income'] > 500000 else 0, axis=1)


In [9]:
['BorrowerID', 'Name', 'Income', 'LoanAmount', 'EMI', 'AssetValue', 'TaxPaid', 
 'EMIToIncomeRatio', 'SuspiciousEMIFlag', 'OverLoanFlag', 'LowTaxHighIncomeFlag']


['BorrowerID',
 'Name',
 'Income',
 'LoanAmount',
 'EMI',
 'AssetValue',
 'TaxPaid',
 'EMIToIncomeRatio',
 'SuspiciousEMIFlag',
 'OverLoanFlag',
 'LowTaxHighIncomeFlag']

In [10]:
df['OverLoanFlag'] = df.apply(
    lambda row: 1 if row['LoanAmount'] > row['AssetValue'] else 0,
    axis=1
)


In [11]:
df['FraudRiskScore'] = (
    df['SuspiciousEMIFlag'] +
    df['OverLoanFlag'] +
    df['LowTaxHighIncomeFlag']
)


In [12]:
df[df['FraudRiskScore'] >= 2][
    ['Name', 'Income', 'LoanAmount', 'EMI', 'AssetValue', 'TaxPaid', 'FraudRiskScore']
].sort_values(by='FraudRiskScore', ascending=False).head(10)


Unnamed: 0,Name,Income,LoanAmount,EMI,AssetValue,TaxPaid,FraudRiskScore
6,Manoj Tiwari,120000,2000000,55000,1500000,10000,2
12,Sachin Mehta,100000,2500000,68000,1800000,15000,2
17,Sanjay Rawat,80000,1500000,42000,1000000,6000,2


## 🔍 Fraud Detection Logic

We used the following rules to flag potential fraud in loan applications:

- EMI greater than 40% of Income → Suspicious
- Loan amount greater than asset value → Risky
- Income above ₹5L with almost zero tax → Red Flag

These rules are combined into a `FraudRiskScore` (0 to 3).
Borrowers with 2 or more flags are considered high-risk.
