In [None]:
# Import python packages
import streamlit as st
import pandas as pd
from faker import Faker
from datetime import timedelta, date
import random

fake = Faker()

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

NUM_CUSTOMERS = 100
NUM_ACCOUNTS = 120
NUM_TRANSACTIONS = 10000
NUM_LOANS = 10
NUM_CREDIT_CARDS = 50
NUM_INVESTMENTS = 15
NUM_FRAUDS = 1

# session.use_database("FINSIGHTS")

In [None]:
# Generate Customer Data
def generate_customers(num_customers):
    customers = []
    for customer_id in range(1, num_customers + 1):
        first_name = fake.first_name()
        last_name = fake.last_name()
        email = fake.email()
        phone_number = fake.phone_number()[:20]
        address = fake.street_address()[:100]
        city = fake.city()[:50]
        state = fake.state()[:50]
        zip_code = fake.zipcode()[:10]
        date_of_birth = fake.date_of_birth(minimum_age=18, maximum_age=90).strftime('%Y-%m-%d')
        account_creation_date = fake.date_between(start_date='-5y', end_date='today').strftime('%Y-%m-%d')
        
        customers.append({
            "FIRST_NAME": first_name,
            "LAST_NAME": last_name,
            "EMAIL": email,
            "PHONE_NUMBER": phone_number,
            "ADDRESS": address,
            "CITY": city,
            "STATE": state,
            "ZIP_CODE": zip_code,
            "DATE_OF_BIRTH": date_of_birth,
            "ACCOUNT_CREATION_DATE": account_creation_date
        })
    return pd.DataFrame(customers)

customers_df = generate_customers(NUM_CUSTOMERS)

session.use_schema("COREBANKING")
session.write_pandas(customers_df, "CUSTOMERS", auto_create_table=True)

print()

In [None]:
TOTCUSTSQL = f''' SELECT COUNT(DISTINCT CUSTOMER_ID) T FROM COREBANKING.CUSTOMERS; '''

total_customers = session.sql(TOTCUSTSQL)
total_customers = total_customers.toPandas()['T'][0]


# Generate Accounts
def generate_accounts(num_accounts):
    accounts = []
    for account_id in range(1, num_accounts + 1):
        customer_id =  random.randint(1, total_customers)
        account_type = random.choice(['Savings', 'Checking'])
        balance = round(random.uniform(1000, 50000), 2)
        interest_rate = round(random.uniform(0.5, 5.0), 2)
        account_open_date = fake.date_between(start_date='-5y', end_date='today').strftime('%Y-%m-%d')
        account_status = random.choice(['Active', 'Inactive', 'Closed'])
        accounts.append({
            "CUSTOMER_ID": customer_id,
            "ACCOUNT_TYPE": account_type,
            "BALANCE": balance,
            "INTEREST_RATE": interest_rate,
            "ACCOUNT_OPEN_DATE": account_open_date,
            "ACCOUNT_STATUS": account_status
        })
    return pd.DataFrame(accounts)

accounts_df = generate_accounts(NUM_ACCOUNTS)

session.use_schema("COREBANKING")
session.write_pandas(accounts_df, "ACCOUNTS", auto_create_table=True)

print()


In [None]:
total_accounts = session.sql(f''' select count (distinct account_id) T from COREBANKING.ACCOUNTS; ''')
total_accounts = total_accounts.toPandas()['T'][0]



# Generate Transactions
def generate_transactions(num_transactions):
    transactions = []
    for transaction_id in range(1, num_transactions + 1):
        account_id = random.randint(1, total_customers)
        transaction_type = random.choice(['Deposit', 'Withdrawal', 'Transfer', 'Payment'])
        transaction_amount = round(random.uniform(10, 5000), 2)
        transaction_date = fake.date_between(start_date='-1y', end_date='today').strftime('%Y-%m-%d')
        merchant = fake.company()[:100]  # Limit merchant name to 100 characters
        category = random.choice(['Groceries', 'Utilities', 'Entertainment', 'Health', 'Travel'])

        transactions.append({
            "ACCOUNT_ID": account_id,
            "TRANSACTION_TYPE": transaction_type,
            "TRANSACTION_AMOUNT": transaction_amount,
            "TRANSACTION_DATE": transaction_date,
            "MERCHANT": merchant,
            "CATEGORY": category
        })
    return pd.DataFrame(transactions)
    
transactions_df = generate_transactions(NUM_TRANSACTIONS)

session.use_schema("COREBANKING")
session.write_pandas(transactions_df, "TRANSACTIONS", auto_create_table=True)

print()

In [None]:
# Generate Loans
def generate_loans(num_loans):
    loans = []
    for loan_id in range(1, num_loans + 1):
        customer_id = random.randint(1, total_customers)
        loan_type = random.choice(['Personal', 'Auto', 'Mortgage'])
        loan_amount = round(random.uniform(5000, 500000), 2)
        loan_balance = round(loan_amount * random.uniform(0.1, 1.0), 2)
        interest_rate = round(random.uniform(2.0, 7.0), 2)
        loan_issue_date = fake.date_between(start_date='-10y', end_date='today').strftime('%Y-%m-%d')
        loan_due_date = (pd.to_datetime(loan_issue_date) + pd.DateOffset(years=random.randint(1, 30))).strftime('%Y-%m-%d')
        
        loans.append({
            "CUSTOMER_ID": customer_id,
            "LOAN_TYPE": loan_type,
            "LOAN_AMOUNT": loan_amount,
            "LOAN_BALANCE": loan_balance,
            "INTEREST_RATE": interest_rate,
            "LOAN_ISSUE_DATE": loan_issue_date,
            "LOAN_DUE_DATE": loan_due_date
        })
    return pd.DataFrame(loans)

loans_df = generate_loans(NUM_LOANS)
session.use_schema("RISKANALYSIS")
session.write_pandas(loans_df, "LOANS", auto_create_table=True)

print()

In [None]:
# Generate Credit Cards
def generate_credit_cards(num_credit_cards):
    credit_cards = []
    for credit_card_id in range(1, num_credit_cards + 1):
        customer_id = random.randint(1, total_customers)
        card_number = fake.credit_card_number(card_type=None)[:20]
        card_type = random.choice(['Visa', 'MasterCard', 'Amex'])
        credit_limit = round(random.uniform(1000, 20000), 2)
        balance = round(random.uniform(0, credit_limit), 2)
        expiration_date = fake.date_between(start_date='today', end_date='+5y').strftime('%Y-%m-%d')
        card_status = random.choice(['Active', 'Inactive', 'Closed'])
        credit_cards.append({
            "CUSTOMER_ID": customer_id,
            "CARD_NUMBER": card_number,
            "CARD_TYPE": card_type,
            "CREDIT_LIMIT": credit_limit,
            "BALANCE": balance,
            "EXPIRATION_DATE": expiration_date,
            "CARD_STATUS": card_status
        })
    return pd.DataFrame(credit_cards)

credit_cards_df = generate_credit_cards(NUM_CREDIT_CARDS)
session.use_schema("RISKANALYSIS")
session.write_pandas(credit_cards_df, "CREDIT_CARDS", auto_create_table=True)

print()

In [None]:
# Generate Investments
def generate_investments(num_investments):
    investments = []
    for investment_id in range(1, num_investments + 1):
        customer_id = random.randint(1, total_customers)
        investment_type = random.choice(['Stock', 'Bond', 'Mutual Fund'])
        investment_amount = round(random.uniform(1000, 100000), 2)
        current_value = round(investment_amount * random.uniform(0.8, 1.5), 2)
        investment_date = fake.date_between(start_date='-10y', end_date='today').strftime('%Y-%m-%d')
        risk_level = random.choice(['Low', 'Medium', 'High'])
        investments.append({
            "CUSTOMER_ID": customer_id,
            "INVESTMENT_TYPE": investment_type,
            "INVESTMENT_AMOUNT": investment_amount,
            "CURRENT_VALUE": current_value,
            "INVESTMENT_DATE": investment_date,
            "RISK_LEVEL": risk_level
        })
    return pd.DataFrame(investments)

investments_df = generate_investments(NUM_INVESTMENTS)
session.use_schema("WEALTHMANAGEMENT")
session.write_pandas(investments_df, "INVESTMENTS", auto_create_table=True)

print()

In [None]:

total_trx = session.sql(f''' select count (distinct TRANSACTION_ID) T from COREBANKING.TRANSACTIONS; ''')
total_trx = total_trx.toPandas()['T'][0]


# Generate Fraud Data
def generate_fraud_data(num_frauds):
    fraud_data = []
    
    for _ in range(num_frauds):
        trx = random.randint(1, total_trx)
        transaction_id = trx
        fraud_type = random.choice(['Phishing', 'Identity Theft', 'Card Skimming', 'Account Takeover'])
        detected_date = fake.date_between(start_date='-1y', end_date='today').strftime('%Y-%m-%d')
        fraud_data.append({
            "TRANSACTION_ID": transaction_id,
            "DETECTED_DATE": detected_date,
            "FRAUD_TYPE": fraud_type,
            "DESCRIPTION": f"Detected {fraud_type} on trx {trx}"
        })
    
    return pd.DataFrame(fraud_data)

fraud_df = generate_fraud_data(NUM_FRAUDS)
session.use_schema("FRAUDDETECTION")
session.write_pandas(fraud_df, "TRANSACTION_FRAUD", auto_create_table=True)

print()
