In [1]:
import csv
import random
from faker import Faker
from datetime import datetime, timedelta

# Initialize Faker
fake = Faker()

# Constants
account_no = "40906174"
start_date = datetime(2016, 1, 1)
end_date = datetime(2024, 8, 18)  # Transactions only up to August 18, 2024
min_transactions_per_day = 1
max_transactions_per_day = 3
transaction_types = ["Groceries", "Entertainment", "Clothes", "Bill Sharing", "Utilities", "Rent", "Transportation", "Education", "Healthcare"]
balance_limit = 90000000  # 90 lakh in paise (₹90 lakh = ₹9,000,000)

# Distribution ranges
distribution_ranges = {
    "10k-20k": (10000, 20000, 0.40),
    "20k-40k": (20000, 40000, 0.30),
    "40k-1L": (40000, 100000, 0.20),
    "1L-2L": (100000, 200000, 0.05),
    "2L-4L": (200000, 300000, 0.03),
    "4L-10L": (300000, 500000, 0.02)
}

# Function to generate a value based on the distribution
def get_distribution_value():
    rand_val = random.random()
    for key, (min_val, max_val, percentage) in distribution_ranges.items():
        if rand_val < percentage:
            return random.randint(min_val, max_val)
    return random.randint(*distribution_ranges["4L-10L"][:2])

# Function to ensure balance is positive as the date approaches 2024
def adjust_balance_for_date(date, balance):
    # Increase balance positively as we approach 2024
    days_left = (end_date - date).days
    if days_left < (end_date - start_date).days * 0.2:  # Last 20% of the period
        balance = max(balance, 10000)  # Ensure a positive balance
    return balance

# Function to generate transactions
def generate_transaction_data():
    data = []
    balance = random.randint(10000, 50000)  # Initial balance
    total_balance_sum = balance
    num_negative = 0
    num_positive = 0
    total_transactions = 0
    num_days = (end_date - start_date).days + 1
    num_negative_balances_needed = int(0.4 * num_days)  # 40% of transactions should be negative

    # Generate transactions for each day within the range
    current_date = start_date
    while current_date <= end_date:
        # Determine the number of transactions per day
        transactions_per_day = random.randint(min_transactions_per_day, max_transactions_per_day)
        
        for _ in range(transactions_per_day):
            transaction_type = random.choice(transaction_types)
            withdrawal_amount = 0
            deposit_amount = 0

            # Ensure at least one of withdrawal_amount or deposit_amount is non-zero
            if random.random() < 0.5:
                withdrawal_amount = get_distribution_value()
            else:
                deposit_amount = get_distribution_value()

            # If both amounts are zero, force a non-zero deposit or withdrawal
            if withdrawal_amount == 0 and deposit_amount == 0:
                withdrawal_amount = get_distribution_value()
                deposit_amount = 0

            # Set transaction details based on the type of transaction
            if withdrawal_amount > 0:
                transaction_detail = transaction_type
            else:
                transaction_detail = ""  # No details for deposit-only transactions

            # Update balance and total balance sum
            balance += deposit_amount - withdrawal_amount
            total_balance_sum += deposit_amount - withdrawal_amount

            # Ensure total balance sum stays below the limit
            if total_balance_sum > balance_limit:
                excess_amount = total_balance_sum - balance_limit
                balance -= excess_amount
                total_balance_sum = balance_limit

            # Adjust balance to ensure 40% of rows are negative
            if num_negative < num_negative_balances_needed:
                if random.random() < 0.4:
                    balance = -abs(balance)  # Ensure balance is negative
                    num_negative += 1
                    num_positive -= 1

            # Ensure 2% of transactions are between ₹4 lakh to ₹10 lakh and are for "Education" or "Healthcare"
            if random.random() < 0.02:
                withdrawal_amount = random.randint(400000, 1000000)
                deposit_amount = 0
                transaction_detail = random.choice(["Education", "Healthcare"])
            elif random.random() < 0.02:
                deposit_amount = random.randint(400000, 1000000)
                withdrawal_amount = 0
                transaction_detail = ""
            else:
                # Adjusting the transaction type for high amounts
                transaction_detail = transaction_detail if withdrawal_amount > 0 else ""

            # Adjust balance to ensure positivity as we approach 2024
            balance = adjust_balance_for_date(current_date, balance)

            data.append({
                "Account_No": account_no,
                "Withdrawal_amount": withdrawal_amount,
                "Deposit_amount": deposit_amount,
                "Balance_amount": balance,
                "Value_date": current_date.strftime("%Y-%m-%d"),
                "Data": current_date.strftime("%Y-%m-%d"),
                "transaction_details": transaction_detail
            })

            total_transactions += 1
        
        # Move to the next day
        current_date += timedelta(days=1)
    
    return data

# Generate the data
transactions = generate_transaction_data()

# Write to CSV
with open("transactions.csv", "w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=["Account_No", "Withdrawal_amount", "Deposit_amount", "Balance_amount", "Value_date", "Data", "transaction_details"])
    writer.writeheader()
    writer.writerows(transactions)

print("CSV file generated successfully.")


CSV file generated successfully.
