### PREDICTIVE ANALYTICS AND RECOMMENDATION SYSTEMS IN BANKING

#### Project objective
     This project is about predicting the Loan Defaults using Supervised Learning, Customer Segmentation using Unsupervised Learning and Recommending Bank Products through a Recommendation Engine.

In [3]:
pip install faker

Note: you may need to restart the kernel to use updated packages.


In [10]:
#imports libraries
from faker import Faker
import pandas as pd
import numpy as np
import random
import warnings
warnings.filterwarnings('ignore')
import scipy.stats as stats

### Data Collection

In [8]:
#generating synthetic data 
fake=Faker()

loan_types=['Personal', 'Business', 'Education', 'Auto', 'Mortgage']

product_names=['Savings Account', 'Checking Account', 'Credit Card', 'Personal Loan', 'Home Loan',
    'Auto Loan', 'Business Loan', 'Student Loan', 'Investment Fund', 'Retirement Plan',
    'Insurance Policy', 'Mutual Fund', 'Bond', 'Certificate of Deposit', 'Home Equity Line',
    'Mortgage Refinance', 'Business Credit Line', 'Auto Refinance', 'Home Improvement Loan',
    'Gold Loan', 'Cash Credit', 'Short-Term Loan', 'Long-Term Loan', 'Travel Loan',
    'Medical Loan', 'Emergency Loan', 'Holiday Loan', 'Debt Consolidation Loan', 'Small Business Loan',
    'Agricultural Loan', 'Technology Loan', 'Education Savings Plan', 'Wealth Management', 
    'Stock Investment', 'Real Estate Investment', 'International Investment', 'Fixed Deposit', 
    'Recurring Deposit', 'Loan Against Property', 'Gold Investment', 'Retirement Savings',
    'High-Yield Savings Account', 'Money Market Account', 'Insurance Savings', 'Pension Plan',
    'Child Education Fund', 'Healthcare Savings', 'Property Investment', 'Auto Insurance'
]

def data_col(n_records,n_customers=100,n_product=50,n_interactions=1000,csv_filename="dataset_revised1.CSV"):
    data=[]
    cus_ids=[f"C0{i+1}" for i in range(n_customers)]
    prod_ids=[f"P0{i+1}" for i in range(n_product)]

    prod_id_to_name={prod_ids[i]: product_names[i % len(product_names)] for i in range(n_product)}

    transaction_frequency = {cus_id: random.randint(1, 10) for cus_id in cus_ids}

    for i in range(n_records):
        Customer_Id=random.choice(cus_ids)
        Age=random.randint(18,70)
        Monthly_Income=round(random.uniform(20000,200000),2)
        Credit_Score=random.randint(300,850)
        Loan_Amount=round(random.uniform(100000,1000000),2)
        Interest_Rate=round(random.uniform(1.0,15.0),2)
        Loan_Term=random.choice([12,24,36,48,60])
        Loan_Type=random.choice(loan_types)
        Transaction_Id=fake.uuid4()
        Gender=np.random.choice(["Male","Female"])
        Transaction_Amount=round(random.uniform(5,2000),2)
        Transaction_Type=np.random.choice(["Deposit","Withdrawal"])
        Transaction_Date=fake.date_between("-1y","now")
        Product_Id=random.choice(prod_ids)
        Product_Name = prod_id_to_name[Product_Id]
        Interaction_Type=np.random.choice(["Purchased","Viewed","Clicked"])
        Interaction_Date=fake.date_between("-1y","now")

        #derived fields
        Debt_Income=Loan_Amount/Monthly_Income

        if Credit_Score<580:
            Credit_Score_Band="Poor"
        elif Credit_Score<670:
            Credit_Score_Band="Fair"
        elif Credit_Score<740:
            Credit_Score_Band="Good"
        else:
            Credit_Score_Band="Excellent"

        default_probability = (700 - Credit_Score) / 400 + (50000 - Monthly_Income) / 100000
        default_probability = min(max(default_probability, 0.05), 0.95)

        Repayment_Status = np.random.binomial(1, default_probability)

        Transaction_Date=pd.to_datetime(Transaction_Date)
        Interaction_Date=pd.to_datetime(Interaction_Date)

        Transaction_Year = Transaction_Date.year
        Transaction_Month = Transaction_Date.month

        Transaction_Frequency = transaction_frequency[Customer_Id]

        Interaction_Year=Interaction_Date.year
        Interaction_Day_Name=Interaction_Date.day_name()

        data.append({
            "Customer_Id": Customer_Id,
            "Age": Age,
            "Monthly_Income": Monthly_Income,
            "Credit_Score": Credit_Score,
            "Credit_Score_Band": Credit_Score_Band,
            "Loan_Amount": Loan_Amount,
            "Interest_Rate": Interest_Rate,
            "Loan_Term": Loan_Term,
            "Loan_Type": Loan_Type,
            "Debt_Income": Debt_Income,
            "Repayment_Status": Repayment_Status,
            "Transaction_Id": Transaction_Id,
            "Gender": Gender,
            "Transaction_Amount": Transaction_Amount,
            "Transaction_Type": Transaction_Type,
            "Transaction_Date": Transaction_Date,
            "Transaction_Year": Transaction_Year,
            "Transaction_Month": Transaction_Month,
            "Product_Id": Product_Id,
            "Product_Name": Product_Name,
            "Interaction_Date": Interaction_Date,
            "Interaction_Year": Interaction_Year,
            "Interaction_Day_Name": Interaction_Day_Name,
            "Interaction_Type": Interaction_Type,
            "Transaction_Frequency": Transaction_Frequency
        })

    df = pd.DataFrame(data)
    df.to_csv(csv_filename,index=False)
    print("Data successfully saved")

data_col(n_records=1000, n_customers=100, n_product=50, n_interactions=1000, csv_filename="/Users/nandhinichandran/Downloads/Finalproject/dataset.csv")           


Data successfully saved
