In [1]:
# Mock Bank Dataset Generation and Analysis

## 1. Introduction

# Overview of SQLite and Python for Dataset Generation
# Libraries used and their purposes

import sqlite3
import pandas as pd
from faker import Faker
import random
import numpy as np

# Initialize Faker
faker = Faker()

# Define constants
NUM_CUSTOMERS = 1000
NUM_PRODUCTS = 50
NUM_TRANSACTIONS = 5000
NUM_DELINQUENCIES = 1000
NUM_POLICIES = 20
NUM_APPLICATIONS = 2000
NUM_COMPLAINTS = 500
NUM_CAMPAIGNS = 300
NUM_MODELS = 10


In [5]:

from datetime import date

def random_date(start_year=2015, end_year=2025):
    start_date = date(start_year, 1, 1)  # Use datetime.date for fixed start date
    end_date = date(end_year, 1, 1)      # Use datetime.date for fixed end date
    return faker.date_between(start_date=start_date, end_date=end_date)


In [6]:

# 1. Generate Customers Dataset
def generate_customers(n):
    customers = []
    for i in range(1, n + 1):
        customers.append([
            i,
            faker.name(),
            random.randint(18, 75),
            random.choice(['Male', 'Female']),
            round(random.uniform(20000, 200000), 2),
            random.choice(['Employed', 'Self-Employed', 'Unemployed']),
            random.choice(['High School', 'Bachelor', 'Master', 'PhD']),
            random.choice(['Single', 'Married', 'Divorced', 'Widowed']),
            random.randint(0, 5),
            random.choice(['Owned', 'Rented']),
            faker.city()
        ])
    return customers

# Preview Customers Dataset
customers = generate_customers(NUM_CUSTOMERS)
customer_df = pd.DataFrame(customers, columns=[
    'ID', 'Name', 'Age', 'Gender', 'Income', 'Employment_Status',
    'Education_Level', 'Marital_Status', 'Dependents',
    'Residential_Status', 'City'
])
customer_df.head()


Unnamed: 0,ID,Name,Age,Gender,Income,Employment_Status,Education_Level,Marital_Status,Dependents,Residential_Status,City
0,1,Calvin Wilson,23,Female,21294.18,Unemployed,High School,Widowed,0,Rented,New Sheilashire
1,2,Joseph Marks,74,Female,111262.39,Employed,High School,Single,1,Rented,Joetown
2,3,Brandon Williams,51,Female,119563.97,Employed,Bachelor,Divorced,0,Rented,Dawnfurt
3,4,Guy Edwards,60,Female,85235.52,Employed,Master,Married,3,Rented,Lake Christina
4,5,Thomas Larsen,66,Male,159996.91,Employed,High School,Married,5,Owned,Thomasborough


In [7]:

# 2. Generate Products Dataset
def generate_products(n):
    products = []
    for i in range(1, n + 1):
        products.append([
            i,
            faker.word().capitalize() + " Credit",
            random.choice(['Credit Card', 'Personal Loan', 'Mortgage', 'Overdraft']),
            round(random.uniform(2.5, 20.0), 2),
            random.randint(5000, 50000),
            random.randint(1, 30),
            round(random.uniform(100, 500), 2),
            round(random.uniform(50, 200), 2)
        ])
    return products

# Preview Products Dataset
products = generate_products(NUM_PRODUCTS)
products_df = pd.DataFrame(products, columns=[
    'ID', 'Name', 'Type', 'Interest_Rate', 'Credit_Limit', 'Loan_Tenure',
    'Origination_Fee', 'Late_Payment_Fee'
])
products_df.head()


Unnamed: 0,ID,Name,Type,Interest_Rate,Credit_Limit,Loan_Tenure,Origination_Fee,Late_Payment_Fee
0,1,Show Credit,Credit Card,9.87,18695,30,302.08,53.06
1,2,Shake Credit,Personal Loan,5.61,9223,21,358.08,111.67
2,3,Whatever Credit,Personal Loan,9.51,18164,3,433.03,119.21
3,4,Seven Credit,Overdraft,14.01,27016,16,428.22,165.27
4,5,Out Credit,Mortgage,3.52,13678,14,282.67,145.56


In [8]:
# 3. Generate Transactions Dataset
def generate_transactions(n, num_customers, num_products):
    transactions = []
    for i in range(1, n + 1):
        customer_id = random.randint(1, num_customers)
        product_id = random.randint(1, num_products)
        due_amount = round(random.uniform(100, 5000), 2)
        payment_amount = due_amount if random.random() > 0.2 else round(random.uniform(0, due_amount - 1), 2)
        overdue_days = 0 if payment_amount == due_amount else random.randint(1, 90)
        transactions.append([
            i, customer_id, product_id, random_date(), due_amount, payment_amount, overdue_days, random.choice(['Online', 'Check', 'Bank Transfer'])
        ])
    return transactions

# Preview Transactions Dataset
transactions = generate_transactions(NUM_TRANSACTIONS, NUM_CUSTOMERS, NUM_PRODUCTS)
transactions_df = pd.DataFrame(transactions, columns=[
    'ID', 'Customer_ID', 'Product_ID', 'Transaction_Date', 'Due_Amount', 'Payment_Amount', 'Overdue_Days', 'Payment_Mode'
])
transactions_df.head()


Unnamed: 0,ID,Customer_ID,Product_ID,Transaction_Date,Due_Amount,Payment_Amount,Overdue_Days,Payment_Mode
0,1,575,43,2022-12-03,3577.04,3577.04,0,Online
1,2,884,5,2015-11-13,2644.88,2644.88,0,Check
2,3,414,22,2021-09-15,948.25,948.25,0,Online
3,4,485,4,2019-05-24,1810.05,1810.05,0,Online
4,5,488,3,2019-08-07,1216.31,1216.31,0,Check


In [9]:

# 4. Create SQLite Database and Tables
conn = sqlite3.connect('mock_bank_data.sqlite')
cursor = conn.cursor()

# Define table schemas
table_definitions = {
    "customers": """CREATE TABLE IF NOT EXISTS customers (
        id INTEGER PRIMARY KEY,
        name TEXT, age INTEGER, gender TEXT,
        income REAL, employment_status TEXT, education_level TEXT,
        marital_status TEXT, dependents INTEGER,
        residential_status TEXT, city TEXT)""",
    "products": """CREATE TABLE IF NOT EXISTS products (
        id INTEGER PRIMARY KEY,
        name TEXT, type TEXT, interest_rate REAL,
        credit_limit INTEGER, loan_tenure INTEGER,
        origination_fee REAL, late_payment_fee REAL)""",
    "transactions": """CREATE TABLE IF NOT EXISTS transactions (
        id INTEGER PRIMARY KEY, customer_id INTEGER, product_id INTEGER,
        transaction_date DATE, due_amount REAL, payment_amount REAL,
        overdue_days INTEGER, payment_mode TEXT)"""
}

# Create tables
for table, definition in table_definitions.items():
    cursor.execute(definition)

# Commit changes
conn.commit()

# 5. Insert Data into SQLite Database
def insert_data(table_name, data):
    placeholders = ', '.join(['?' for _ in data[0]])
    query = f"INSERT INTO {table_name} VALUES ({placeholders})"
    cursor.executemany(query, data)
    conn.commit()

# Insert datasets into tables
insert_data("customers", customers)
insert_data("products", products)
insert_data("transactions", transactions)

# Query Example: Retrieve High Income Customers
query = "SELECT * FROM customers WHERE income > 100000"
cursor.execute(query)
high_income_customers = cursor.fetchall()

# Display high-income customers
high_income_customers_df = pd.DataFrame(high_income_customers, columns=[
    'ID', 'Name', 'Age', 'Gender', 'Income', 'Employment_Status',
    'Education_Level', 'Marital_Status', 'Dependents',
    'Residential_Status', 'City'
])
high_income_customers_df.head()

# Close the database connection
conn.close()


  cursor.executemany(query, data)


In [10]:


# 4. Create SQLite Database and Tables
conn = sqlite3.connect('mock_bank_data.sqlite')
cursor = conn.cursor()

# Final Query: Retrieve Transactions with Overdue Payments
final_query = "SELECT t.id, t.transaction_date, t.due_amount, t.payment_amount, t.overdue_days, c.name AS customer_name, p.name AS product_name FROM transactions t JOIN customers c ON t.customer_id = c.id JOIN products p ON t.product_id = p.id WHERE t.overdue_days > 0"
cursor.execute(final_query)
overdue_transactions = cursor.fetchall()

# Display overdue transactions
overdue_transactions_df = pd.DataFrame(overdue_transactions, columns=[
    'Transaction_ID', 'Transaction_Date', 'Due_Amount', 'Payment_Amount', 'Overdue_Days', 'Customer_Name', 'Product_Name'
])

# Show the first few results
print("Overdue Transactions:")
print(overdue_transactions_df.head())

# Close the database connection
conn.close()


Overdue Transactions:
   Transaction_ID Transaction_Date  Due_Amount  Payment_Amount  Overdue_Days  \
0               6       2024-01-17      749.97          333.28             9   
1              21       2020-02-13     4901.72         4048.66            75   
2              29       2019-07-24     1225.12          481.95            21   
3              32       2023-11-15     2530.16         1391.21            61   
4              39       2023-04-15     1231.47          607.86            64   

        Customer_Name    Product_Name  
0  Gabrielle Martinez  Receive Credit  
1      Diana Peterson      All Credit  
2         Amber Kelly    Shake Credit  
3         Julie White     Show Credit  
4         Robin Smith      Arm Credit  
