#Setup

In [None]:
pip install google-generativeai faker pandas numpy

Collecting faker
  Downloading faker-37.4.2-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.4.2-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.4.2


In [None]:
import google.generativeai as genai
from google.colab import userdata

api_key = userdata.get('GEMINI_API_KEY')
genai.configure(api_key=api_key)
model = genai.GenerativeModel("gemini-2.5-flash")
print("Gemini API configured successfully using Colab Secrets.")


Gemini API configured successfully using Colab Secrets.


#Prompt

In [None]:
def build_prompt(batch_size=10):
    return f"""
You are a JSON API simulator for a synthetic transaction generator within the Vybe app.

Generate exactly 10 simulated transactions as a **single minified JSON array** of 10 objects. Return only one line of output. No markdown, no explanations, no line breaks.

Each object in the array must include the following keys:

Metadata:
- transaction_id: UUID
- user_id: user_{{number}}
- timestamp_initiated: in 'YYYY-MM-DD HH:MM:SS' format
- amount: float between 10.0 and 10000.0
- transaction_type: One of [
    "Bank to Bank (InstaPay)", "Bank to Bank (PESONet)", "Bank to e-Wallet (GCash)", "Bank to e-Wallet (Maya)",
    "Bank to e-Wallet (ShopeePay)", "BPI to Vybe Wallet", "Vybe Wallet to GCash", "Vybe Wallet to Maya",
    "Vybe Wallet to ShopeePay", "Vybe Wallet to Vybe Wallet", "Vybe Wallet to Bank (BPI)",
    "Internal Vybe App Transfer", "Internal Cashback Credit", "Auto-Reversal Processed", "Auto-Retry Triggered",
    "Manual Escalation Triggered", "QR Payment (Merchant)", "QR Payment (P2P)", "Bills Payment (via Vybe Wallet)",
    "Bills Payment (via BPI Linked)", "Scheduled Transfer (Future Dated)", "Cash-In via Partner Outlet", "Cash-Out via ATM or OTC"
]
- recipient_type: "New Recipient" or "Frequent Recipient"
- recipient_account_id: 12-digit string
- recipient_bank_name_or_ewallet: matches transaction_type
- device_id: UUID
- location_coordinates: [float_latitude, float_longitude]
- simulated_network_latency: int from 50 to 5000 (ms)

Transaction Status Progression:
- status_timestamp_1: time for "Initiated"
- status_1: "Initiated"
- status_timestamp_2: time for "Debit Confirmed (BPI)"
- status_2: "Debit Confirmed (BPI)"
- status_timestamp_3: time for "Processing (Recipient Bank/e-Wallet)"
- status_3: "Processing (Recipient Bank/e-Wallet)"
- status_timestamp_4: time for final status
- status_4: one of ["Credit Confirmed (Recipient)", "Failed (Network Error)", "Failed (Timeout)", "Reversed (User Cancelled)"]
- expected_completion_time: 2 to 10 minutes after initiation

Anomalies:
- is_floating_cash: Boolean
- floating_duration_minutes: Int (0 if false)
- is_fraudulent_attempt: Boolean
- is_cancellation: Boolean
- is_retry_successful: Boolean
- manual_escalation_needed: Boolean

All timestamps must be chronologically logical. Use realistic variations.

RESPONSE FORMAT: one-line minified JSON array of 10 objects, no extra text.

"""


#Actual Generation

In [None]:
import json
import pandas as pd
import time
from datetime import datetime

# Settings
batch_size = 10
target_total = 2400
max_retries = 3
save_interval = 100  # Save to CSV every 100 entries

synthetic_data = []

pd.set_option("display.max_columns", None)

while len(synthetic_data) < target_total:
    print(f"\n Generating batch... Current count: {len(synthetic_data)}")

    prompt = build_prompt(batch_size)

    success = False
    retries = 0
    while not success and retries < max_retries:
        try:
            response = model.generate_content(prompt)
            raw_text = response.text.strip()

            # Validate and parse JSON array
            batch = json.loads(raw_text)

            if isinstance(batch, list) and all(isinstance(entry, dict) for entry in batch):
                synthetic_data.extend(batch)
                print(f"[✓] Added {len(batch)} entries. Total: {len(synthetic_data)}")
                success = True
            else:
                raise ValueError("Invalid batch format")

        except Exception as e:
            retries += 1
            print(f"[!] Retry {retries}/{max_retries}: {e}")
            time.sleep(1)

    # Save intermediate progress
    if len(synthetic_data) % save_interval == 0:
        df = pd.DataFrame(synthetic_data)
        filename = f"synthetic_transactions_progress_{len(synthetic_data)}.csv"
        df.to_csv(filename, index=False)
        print(f"Saved checkpoint: {filename}")

# Final save
df = pd.DataFrame(synthetic_data)
df.to_csv("synthetic_transactions_gemini_final.csv", index=False)
print(f"\n Done! Total entries generated: {len(synthetic_data)}")



 Generating batch... Current count: 0
