##### Version estable

In [1]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

In [2]:
# Set seed for reproducibility
np.random.seed(0)

# Constants for the synthetic data generation
days = 30
transactions_per_day = 42
initial_balance_min = 0
initial_balance_max = 35000
max_income_transaction_value = 4000
max_cost_transaction_value = 1000
max_expense_transaction_value = 3000

# Generate the 'Momento' column with datetime objects in a regular interval throughout the days
start_datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
datetimes = [start_datetime + timedelta(minutes=30) * i for i in range(days * transactions_per_day)]

# Initialize 'Balance' with a random initial value
initial_balance = np.random.randint(initial_balance_min, initial_balance_max + 1)
# Update the transactions logic to respect the new maximum transaction values
transactions = np.zeros((days * transactions_per_day, 3))  # Reset transaction matrix with zeros
balances = [initial_balance]  # Reset balance with the initial value

for i in range(1, transactions_per_day * days):
    transaction_type = np.random.choice([0, 1, 2])  # Random transaction type: 0 - Costo, 1 - Gasto, 2 - Ingreso
    
    # Determine max transaction value based on type
    if transaction_type == 0:
        max_transaction_value = max_cost_transaction_value
    elif transaction_type == 1:
        max_transaction_value = max_expense_transaction_value
    else:  # transaction_type == 2
        max_transaction_value = max_income_transaction_value
    
    transaction_value = np.random.randint(1, max_transaction_value + 1)  # Random transaction value within the max range
    
    # Ensure the balance doesn't go below the minimum balance of -20,000
    if transaction_type != 2:  # For 'Costo' or 'Gasto', check if the balance goes below -20,000
        if balances[-1] - transaction_value < -20000:
            transaction_value = balances[-1] + 20000  # Adjust the transaction to maintain the minimum balance
    else:  # For 'Ingreso', check if the transaction is within the max value
        transaction_value = min(transaction_value, max_income_transaction_value)
    
    # Apply transaction to balance
    if transaction_type == 2:  # 'Ingreso' increases balance
        balances.append(balances[-1] + transaction_value)
    else:  # 'Costo' or 'Gasto' decreases balance
        balances.append(balances[-1] - transaction_value)

    
    transactions[i, transaction_type] = transaction_value

# For the last transaction set all to zero
transactions[-1] = [0, 0, 0]
balances.append(balances[-1])

# Create DataFrame from the generated data
synthetic_data = pd.DataFrame({
    "Momento": datetimes,
    "Balance": balances[:-1],  # Exclude the last one as it's not associated with a 'Momento'
    "Costo": transactions[:, 0],
    "Gasto": transactions[:, 1],
    "Ingreso": transactions[:, 2]
})

In [3]:
synthetic_data  # Display the first few rows of the synthetic data set

Unnamed: 0,Momento,Balance,Costo,Gasto,Ingreso
0,2024-03-08 00:00:00,2732,0.0,0.0,0.0
1,2024-03-08 00:30:00,1896,0.0,836.0,0.0
2,2024-03-08 01:00:00,1618,0.0,278.0,0.0
3,2024-03-08 01:30:00,3447,0.0,0.0,1829.0
4,2024-03-08 02:00:00,6992,0.0,0.0,3545.0
...,...,...,...,...,...
1255,2024-04-03 03:30:00,36582,0.0,1188.0,0.0
1256,2024-04-03 04:00:00,38498,0.0,0.0,1916.0
1257,2024-04-03 04:30:00,37675,823.0,0.0,0.0
1258,2024-04-03 05:00:00,37592,83.0,0.0,0.0


In [4]:
synthetic_data.describe()

Unnamed: 0,Balance,Costo,Gasto,Ingreso
count,1260.0,1260.0,1260.0,1260.0
mean,9424.392857,149.030952,496.811905,673.509524
std,21655.122462,274.715293,856.834441,1167.121898
min,-20000.0,0.0,0.0,0.0
25%,-6798.25,0.0,0.0,0.0
50%,5684.0,0.0,0.0,0.0
75%,20666.5,179.0,729.0,1045.25
max,65574.0,999.0,2998.0,3970.0


In [5]:
synthetic_data.to_csv("/home/ricardo/Documents/nacion/primerSistema/inputs/dataSintetica.csv", index = False)