In [1]:
import sqlite3

# Connect to SQLite database
conn = sqlite3.connect("mock_bank_data.sqlite")
cursor = conn.cursor()

try:
    # Get a list of all tables in the database
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    
    # Drop each table
    for table_name in tables:
        table_name = table_name[0]  # Extract the table name from the tuple
        print(f"Dropping table: {table_name}")
        cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
    
    # Vacuum the database to reclaim space
    cursor.execute("VACUUM;")
    print("All tables dropped and database cleaned up successfully.")

except sqlite3.Error as e:
    print(f"An error occurred: {e}")

finally:
    # Commit changes and close the connection
    conn.commit()
    conn.close()


Dropping table: Customers
Dropping table: Accounts
Dropping table: Products
Dropping table: Transactions
Dropping table: Installments
Dropping table: Cost_of_Capital
Dropping table: Marketing_Campaigns
Dropping table: Customer_Marketing
Dropping table: Revenue_Interest
All tables dropped and database cleaned up successfully.


In [2]:
import sqlite3
from faker import Faker
import random
from datetime import datetime, timedelta, date

# Register custom date adapter for SQLite
def adapt_date(py_date):
    return py_date.isoformat()  # Convert to 'YYYY-MM-DD' string

def convert_date(sql_date):
    return datetime.strptime(sql_date, "%Y-%m-%d").date()

# Register the adapters and converters
sqlite3.register_adapter(date, adapt_date)
sqlite3.register_converter("DATE", convert_date)

# Connect to SQLite database
conn = sqlite3.connect("mock_bank_data.sqlite", detect_types=sqlite3.PARSE_DECLTYPES)
cursor = conn.cursor()

# Initialize Faker
faker = Faker()

# SQL statements to create tables
create_table_queries = [
    """
    CREATE TABLE IF NOT EXISTS Customers (
        Customer_ID INTEGER PRIMARY KEY,
        Name TEXT NOT NULL,
        Date_of_Birth DATE,
        Gender TEXT,
        Address TEXT,
        Phone_Number TEXT,
        Email TEXT,
        Account_Open_Date DATE,
        Relationship_Status TEXT,
        Risk_Score INTEGER
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Accounts (
        Account_ID INTEGER PRIMARY KEY,
        Customer_ID INTEGER,
        Account_Type TEXT,
        Account_Status TEXT,
        Account_Open_Date DATE,
        Balance REAL,
        Interest_Rate REAL,
        Approval_Status TEXT,
        Approval_Date DATE,
        Default_Status TEXT,
        FOREIGN KEY (Customer_ID) REFERENCES Customers(Customer_ID)
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Products (
        Product_ID INTEGER PRIMARY KEY,
        Product_Name TEXT,
        Product_Type TEXT,
        Interest_Rate REAL,
        Fees REAL,
        Launch_Date DATE,
        End_Date DATE
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Transactions (
        Transaction_ID INTEGER PRIMARY KEY,
        Account_ID INTEGER,
        Transaction_Date DATE,
        Transaction_Type TEXT,
        Amount REAL,
        Balance_After_Transaction REAL,
        Merchant_Details TEXT,
        FOREIGN KEY (Account_ID) REFERENCES Accounts(Account_ID)
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Installments (
        Installment_ID INTEGER PRIMARY KEY,
        Account_ID INTEGER,
        Product_ID INTEGER,
        Installment_Start_Date DATE,
        Installment_End_Date DATE,
        Installment_Amount REAL,
        Paid_Amount REAL,
        Remaining_Balance REAL,
        FOREIGN KEY (Account_ID) REFERENCES Accounts(Account_ID),
        FOREIGN KEY (Product_ID) REFERENCES Products(Product_ID)
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Cost_of_Capital (
        Date DATE PRIMARY KEY,
        Cost_of_Capital REAL,
        Base_Rate REAL,
        Spread REAL
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Marketing_Campaigns (
        Campaign_ID INTEGER PRIMARY KEY,
        Campaign_Name TEXT,
        Start_Date DATE,
        End_Date DATE,
        Cost REAL
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Customer_Marketing (
        Customer_ID INTEGER,
        Campaign_ID INTEGER,
        FOREIGN KEY (Customer_ID) REFERENCES Customers(Customer_ID),
        FOREIGN KEY (Campaign_ID) REFERENCES Marketing_Campaigns(Campaign_ID)
    );
    """,
    """
    CREATE TABLE IF NOT EXISTS Revenue_Interest (
        Account_ID INTEGER PRIMARY KEY,
        Accrued_Interest REAL,
        FOREIGN KEY (Account_ID) REFERENCES Accounts(Account_ID)
    );
    """
]

# Execute table creation queries
for query in create_table_queries:
    cursor.execute(query)

# Data generation functions
def generate_customers(n=100):
    customers = []
    for _ in range(n):
        customers.append((
            faker.name(),
            faker.date_of_birth(minimum_age=18, maximum_age=80),
            random.choice(["Male", "Female"]),
            faker.address(),
            random.choice([faker.phone_number(), None]),  # Sometimes NULL
            random.choice([faker.email(), None]),         # Sometimes NULL
            faker.date_between(start_date="-10y", end_date="today"),
            random.choice(["Active", "Inactive", "Closed"]),
            random.randint(1, 100)
        ))
    return customers

def generate_accounts(customers):
    accounts = []
    for customer_id in range(1, len(customers) + 1):
        for _ in range(random.randint(1, 3)):
            accounts.append((customer_id, random.choice(["Savings", "Checking", "Credit"]), 
                             random.choice(["Open", "Closed"]), 
                             faker.date_between(start_date="-10y", end_date="today"), 
                             round(random.uniform(100, 100000), 2), 
                             random.choice([round(random.uniform(0.5, 5.0), 2), None])))
    return accounts

def enhance_accounts_with_approval_and_default(accounts):
    enhanced_accounts = []
    for account in accounts:
        approval_status = random.choice(["Approved", "Rejected"])
        approval_date = faker.date_between(start_date="-10y", end_date="today") if approval_status == "Approved" else None
        default_status = random.choice(["Defaulted", "Active"]) if approval_status == "Approved" else None
        enhanced_accounts.append((*account, approval_status, approval_date, default_status))
    return enhanced_accounts

def generate_products(n=20):
    products = []
    for _ in range(n):
        products.append((faker.word().capitalize() + " Product", 
                         random.choice(["Loan", "Credit Card", "Savings Plan"]), 
                         round(random.uniform(1.0, 15.0), 2), 
                         round(random.uniform(10, 500), 2), 
                         faker.date_between(start_date="-10y", end_date="today"), 
                         random.choice([faker.date_between(start_date="today", end_date="+10y"), None])))
    return products

def generate_transactions(accounts, n=1000):
    transactions = []
    for _ in range(n):
        account_id = random.choice(accounts)[0]
        transaction_date = faker.date_between(start_date="-5y", end_date="today")
        transaction_type = random.choice(["Deposit", "Withdrawal", "Payment"])
        amount = round(random.uniform(10, 10000), 2)
        balance_after = round(random.uniform(10, 100000), 2)
        merchant_details = random.choice([faker.company(), None])  # Merchant details sometimes NULL
        transactions.append((account_id, transaction_date, transaction_type, amount, balance_after, merchant_details))
    return transactions

def generate_installments(accounts, products, n=100):
    installments = []
    for _ in range(n):
        account_id = random.choice(accounts)[0]
        product_id = random.choice(products)[0]
        start_date = faker.date_between(start_date="-3y", end_date="today")
        end_date = faker.date_between(start_date=start_date, end_date="+2y")
        amount = round(random.uniform(100, 5000), 2)
        paid_amount = random.choice([round(amount * random.uniform(0.1, 1.0), 2), None])  # Sometimes NULL
        remaining_balance = amount - (paid_amount if paid_amount else 0)
        installments.append((account_id, product_id, start_date, end_date, amount, paid_amount, remaining_balance))
    return installments

def generate_marketing_campaigns(n=10):
    campaigns = []
    for _ in range(n):
        campaigns.append((faker.catch_phrase(), 
                          faker.date_between(start_date="-3y", end_date="today"), 
                          faker.date_between(start_date="today", end_date="+1y"), 
                          round(random.uniform(1000, 10000), 2)))
    return campaigns

def generate_customer_marketing(customers, campaigns):
    customer_marketing = []
    for customer_id in range(1, len(customers) + 1):
        campaign_id = random.choice(range(1, len(campaigns) + 1))
        customer_marketing.append((customer_id, campaign_id))
    return customer_marketing

def generate_cost_of_capital(n=100):
    cost_of_capital = []
    unique_dates = set()  # Track unique dates to prevent duplicates
    while len(cost_of_capital) < n:
        date = faker.date_between(start_date="-10y", end_date="today")
        if date not in unique_dates:  # Ensure date is unique
            unique_dates.add(date)
            cost_of_capital.append((
                date,
                round(random.uniform(1.0, 10.0), 2),  # Cost of capital
                round(random.uniform(0.5, 5.0), 2),  # Base rate
                round(random.uniform(0.1, 2.0), 2)   # Spread
            ))
    return cost_of_capital

def calculate_revenue_interest(accounts):
    revenue_interest = {}
    for account in accounts:
        account_id = account[0]
        balance = account[4]
        interest_rate = account[5]
        if account_id not in revenue_interest:  # Ensure each account is processed only once
            accrued_interest = round(balance * (interest_rate / 100), 2) if balance and interest_rate else 0.0
            revenue_interest[account_id] = accrued_interest
    return [(account_id, accrued_interest) for account_id, accrued_interest in revenue_interest.items()]

# Insert data into tables
customers = generate_customers(100)
cursor.executemany("""
    INSERT INTO Customers (Name, Date_of_Birth, Gender, Address, Phone_Number, Email, Account_Open_Date, Relationship_Status, Risk_Score)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", customers)

accounts = generate_accounts(customers)
enhanced_accounts = enhance_accounts_with_approval_and_default(accounts)
cursor.executemany("""
    INSERT INTO Accounts (Customer_ID, Account_Type, Account_Status, Account_Open_Date, Balance, Interest_Rate, Approval_Status, Approval_Date, Default_Status)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", enhanced_accounts)

products = generate_products(20)
cursor.executemany("""
    INSERT INTO Products (Product_Name, Product_Type, Interest_Rate, Fees, Launch_Date, End_Date)
    VALUES (?, ?, ?, ?, ?, ?)
""", products)

transactions = generate_transactions(accounts, 1000)
cursor.executemany("""
    INSERT INTO Transactions (Account_ID, Transaction_Date, Transaction_Type, Amount, Balance_After_Transaction, Merchant_Details)
    VALUES (?, ?, ?, ?, ?, ?)
""", transactions)

installments = generate_installments(accounts, products, 100)
cursor.executemany("""
    INSERT INTO Installments (Account_ID, Product_ID, Installment_Start_Date, Installment_End_Date, Installment_Amount, Paid_Amount, Remaining_Balance)
    VALUES (?, ?, ?, ?, ?, ?, ?)
""", installments)

campaigns = generate_marketing_campaigns(10)
cursor.executemany("""
    INSERT INTO Marketing_Campaigns (Campaign_Name, Start_Date, End_Date, Cost)
    VALUES (?, ?, ?, ?)
""", campaigns)

customer_marketing = generate_customer_marketing(customers, campaigns)
cursor.executemany("""
    INSERT INTO Customer_Marketing (Customer_ID, Campaign_ID)
    VALUES (?, ?)
""", customer_marketing)

cost_of_capital = generate_cost_of_capital(100)
cursor.executemany("""
    INSERT INTO Cost_of_Capital (Date, Cost_of_Capital, Base_Rate, Spread)
    VALUES (?, ?, ?, ?)
""", cost_of_capital)

revenue_interest = calculate_revenue_interest(enhanced_accounts)
cursor.executemany("""
    INSERT INTO Revenue_Interest (Account_ID, Accrued_Interest)
    VALUES (?, ?)
""", revenue_interest)

# Commit and close
conn.commit()
conn.close()

print("Enhanced mock data inserted successfully.")


Enhanced mock data inserted successfully.


In [3]:
# Query sample data from Customers table
import sqlite3
import pandas as pd

# Connect to SQLite database
conn = sqlite3.connect("mock_bank_data.sqlite")


customers_query = """
SELECT * FROM Customers LIMIT 5;
"""
customers_sample = pd.read_sql_query(customers_query, conn)

# Display sample data
print("Sample from Customers Table:")
display(customers_sample)


Sample from Customers Table:


Unnamed: 0,Customer_ID,Name,Date_of_Birth,Gender,Address,Phone_Number,Email,Account_Open_Date,Relationship_Status,Risk_Score
0,1,Mr. Richard Stewart,1950-03-18,Female,"911 Sean Junction Suite 471\nHollandmouth, OH ...",,,2015-11-02,Closed,36
1,2,William Allen,1990-02-26,Female,96377 James Motorway Suite 640\nLake Jessicast...,,jvillanueva@example.org,2016-08-12,Inactive,24
2,3,Edward Smith,2002-09-08,Female,"37478 Chan Route\nThomasport, NV 83710",,jhernandez@example.net,2020-02-01,Active,60
3,4,Daniel Phillips,1978-11-01,Female,"PSC 7872, Box 0606\nAPO AP 94521",,ycompton@example.org,2024-09-27,Closed,65
4,5,Joseph Dawson,1958-06-01,Female,"32507 Richard Mill Suite 665\nWest Catherine, ...",,,2019-03-17,Active,54


In [4]:
# Query sample data from Accounts table
accounts_query = """
SELECT * FROM Accounts LIMIT 5;
"""
accounts_sample = pd.read_sql_query(accounts_query, conn)

# Display sample data
print("Sample from Accounts Table:")
display(accounts_sample)


Sample from Accounts Table:


Unnamed: 0,Account_ID,Customer_ID,Account_Type,Account_Status,Account_Open_Date,Balance,Interest_Rate,Approval_Status,Approval_Date,Default_Status
0,1,1,Checking,Open,2022-07-16,74652.18,,Approved,2022-05-07,Active
1,2,1,Savings,Closed,2021-12-23,18369.47,,Rejected,,
2,3,1,Credit,Closed,2015-09-29,2294.62,4.61,Approved,2019-02-18,Active
3,4,2,Checking,Closed,2019-01-27,7990.82,,Approved,2015-08-17,Defaulted
4,5,2,Checking,Closed,2022-10-14,55635.39,,Rejected,,


In [5]:
# Query sample data from Accounts table
accounts_query = """
SELECT * FROM Accounts LIMIT 5;
"""
accounts_sample = pd.read_sql_query(accounts_query, conn)

# Display sample data
print("Sample from Accounts Table:")
display(accounts_sample)


Sample from Accounts Table:


Unnamed: 0,Account_ID,Customer_ID,Account_Type,Account_Status,Account_Open_Date,Balance,Interest_Rate,Approval_Status,Approval_Date,Default_Status
0,1,1,Checking,Open,2022-07-16,74652.18,,Approved,2022-05-07,Active
1,2,1,Savings,Closed,2021-12-23,18369.47,,Rejected,,
2,3,1,Credit,Closed,2015-09-29,2294.62,4.61,Approved,2019-02-18,Active
3,4,2,Checking,Closed,2019-01-27,7990.82,,Approved,2015-08-17,Defaulted
4,5,2,Checking,Closed,2022-10-14,55635.39,,Rejected,,


In [6]:
# Query sample data from Transactions table
transactions_query = """
SELECT * FROM Transactions LIMIT 5;
"""
transactions_sample = pd.read_sql_query(transactions_query, conn)

# Display sample data
print("Sample from Transactions Table:")
display(transactions_sample)


Sample from Transactions Table:


Unnamed: 0,Transaction_ID,Account_ID,Transaction_Date,Transaction_Type,Amount,Balance_After_Transaction,Merchant_Details
0,1,33,2022-04-23,Withdrawal,5803.03,52366.4,
1,2,39,2020-12-25,Deposit,2093.62,31629.02,
2,3,58,2021-11-30,Withdrawal,8635.6,24406.39,Romero PLC
3,4,8,2020-05-09,Deposit,6952.94,24123.56,
4,5,91,2024-07-15,Withdrawal,3832.63,12121.91,


In [7]:
# Query sample data from Products table
products_query = """
SELECT * FROM Products LIMIT 5;
"""
products_sample = pd.read_sql_query(products_query, conn)

# Display sample data
print("Sample from Products Table:")
display(products_sample)


Sample from Products Table:


Unnamed: 0,Product_ID,Product_Name,Product_Type,Interest_Rate,Fees,Launch_Date,End_Date
0,1,Character Product,Loan,13.49,83.03,2018-08-09,2029-01-21
1,2,One Product,Credit Card,12.09,39.48,2024-09-26,
2,3,Finish Product,Loan,7.27,61.18,2020-12-23,2033-02-25
3,4,Change Product,Savings Plan,3.86,341.97,2018-01-31,2031-06-25
4,5,Gas Product,Loan,3.5,147.78,2024-06-30,2029-08-27


In [8]:
# Query sample data from Installments table
installments_query = """
SELECT * FROM Installments LIMIT 5;
"""
installments_sample = pd.read_sql_query(installments_query, conn)

# Display sample data
print("Sample from Installments Table:")
display(installments_sample)


Sample from Installments Table:


Unnamed: 0,Installment_ID,Account_ID,Product_ID,Installment_Start_Date,Installment_End_Date,Installment_Amount,Paid_Amount,Remaining_Balance
0,1,39,Mind Product,2024-04-13,2025-03-17,3183.34,2951.29,232.05
1,2,82,Break Product,2023-05-12,2024-02-25,1172.76,,1172.76
2,3,59,Executive Product,2023-01-04,2023-09-03,4529.05,481.7,4047.35
3,4,24,Animal Product,2023-04-24,2026-12-04,1129.23,,1129.23
4,5,51,Recent Product,2023-12-12,2025-01-21,3433.68,2624.46,809.22


In [9]:
# Query sample data from Cost_of_Capital table
cost_of_capital_query = """
SELECT * FROM Cost_of_Capital LIMIT 5;
"""
cost_of_capital_sample = pd.read_sql_query(cost_of_capital_query, conn)

# Display sample data
print("Sample from Cost_of_Capital Table:")
display(cost_of_capital_sample)


Sample from Cost_of_Capital Table:


Unnamed: 0,Date,Cost_of_Capital,Base_Rate,Spread
0,2022-02-08,1.92,1.9,0.26
1,2019-01-07,1.8,2.79,0.99
2,2021-02-15,9.68,1.41,1.38
3,2021-01-09,6.49,2.4,1.55
4,2023-05-27,2.98,1.0,0.99


In [10]:
# Close connection
conn.close()
