In [3]:
pip install faker cx_Oracle mysql-connector-python psycopg2 pyodbc


Defaulting to user installation because normal site-packages is not writeable
Collecting cx_Oracle
  Downloading cx_Oracle-8.3.0.tar.gz (363 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting mysql-connector-python
  Downloading mysql_connector_python-9.4.0-cp312-cp312-win_amd64.whl.metadata (7.7 kB)
Collecting psycopg2
  Downloading psycopg2-2.9.10-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Downloading mysql_connector_python-9.4.0-cp312-cp312-win_amd64.whl (16.4 MB)
   ---------------------------------------- 0.0/16.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/16.4 MB ? eta -:--:--
    --------------------------------------- 0.3/16.4 MB ? eta -:--:--
   - ------------------

In [1]:
#!/usr/bin/env python3
import csv
import os
import random
import string
from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable, Tuple

# ---------- Config ----------
ROW_COUNT = 1000    # rows per CSV
OUTPUT_DIR = Path("//wsl.localhost/Ubuntu/home/nicolas/multi-db-env/data") # change if you want another folder
TIMESTAMP_UTC = True        # write ISO timestamps with trailing 'Z'
PROGRESS_EVERY = 100_000    # print a progress line every N rows

# File names by DB
DBS = {
    "oracle":   {"prefix": "ORCL",  "filename": "block_transactions_oracle.csv",   "seed": 101},
    "mysql":    {"prefix": "MYSQL", "filename": "block_transactions_mysql.csv",    "seed": 202},
    "postgres": {"prefix": "PG",    "filename": "block_transactions_postgres.csv", "seed": 303},
    "mssql":    {"prefix": "MSSQL", "filename": "block_transactions_mssql.csv",    "seed": 404},
}

# Fixed schema/choices
COLUMNS = [
    "Block_num",
    "Trx_id",
    "Timestamp",
    "Amount",
    "Contract_type",
    "Currency",
    "Event_name",
    "From_Address",
    "To_Address",
]

CONTRACT_TYPES = ["SmartContract", "TokenTransfer", "NFTMint"]
CURRENCIES = ["USD", "ETH", "BTC", "USDT"]
EVENTS = ["Transfer", "Approval", "Stake"]

# ---------- Helpers ----------
def rand_hex(n: int) -> str:
    return "".join(random.choices("0123456789abcdef", k=n))

def rand_alnum(n: int) -> str:
    return "".join(random.choices(string.ascii_letters + string.digits, k=n))

def rand_eth_address() -> str:
    return "0x" + rand_hex(40)

def iso_utc(dt: datetime) -> str:
    # Ensure ISO8601 format with or without 'Z'
    s = dt.replace(microsecond=0).isoformat()
    return s + "Z" if TIMESTAMP_UTC else s

def record_stream(db_prefix: str, n: int) -> Iterable[Tuple]:
    """
    Yields tuples for CSV rows with a DB-specific prefix in Trx_id
    and stable randomization driven by the current RNG state.
    """
    now = datetime.utcnow()
    for i in range(1, n + 1):
        ts = now - timedelta(seconds=random.randint(0, 1_000_000))
        amount = random.uniform(0.01, 10_000.0)
        # Ensure Trx_id is unique within each CSV and distinct per DB by prefixing
        trx_id = f"{db_prefix}_" + rand_alnum(60)
        yield (
            i,                              # Block_num
            trx_id,                         # Trx_id
            iso_utc(ts),                    # Timestamp
            f"{amount:.8f}",                # Amount
            random.choice(CONTRACT_TYPES),  # Contract_type
            random.choice(CURRENCIES),      # Currency
            random.choice(EVENTS),          # Event_name
            rand_eth_address(),             # From_Address
            rand_eth_address(),             # To_Address
        )

def generate_csv(db_key: str, rows: int, out_dir: Path) -> Path:
    meta = DBS[db_key]
    out_dir.mkdir(parents=True, exist_ok=True)
    path = out_dir / meta["filename"]
    print(f"\n[{db_key.upper()}] Generating {rows:,} rows → {path}")

    # DB-specific RNG seed → different but reproducible datasets
    random.seed(meta["seed"])

    with open(path, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(COLUMNS)
        for idx, rec in enumerate(record_stream(meta["prefix"], rows), start=1):
            w.writerow(rec)
            if idx % PROGRESS_EVERY == 0:
                print(f"[{db_key.upper()}] {idx:,}/{rows:,} written…")
    print(f"[{db_key.upper()}] Done.")
    return path

def main():
    print(f"Output directory: {OUTPUT_DIR.resolve()}")
    for db in DBS.keys():
        generate_csv(db, ROW_COUNT, OUTPUT_DIR)

    print("\nAll CSVs ready:")
    for db, meta in DBS.items():
        print(f" - {meta['filename']}")

if __name__ == "__main__":
    main()


Output directory: \\wsl.localhost\Ubuntu\home\nicolas\multi-db-env\data

[ORACLE] Generating 1,000 rows → \\wsl.localhost\Ubuntu\home\nicolas\multi-db-env\data\block_transactions_oracle.csv
[ORACLE] Done.

[MYSQL] Generating 1,000 rows → \\wsl.localhost\Ubuntu\home\nicolas\multi-db-env\data\block_transactions_mysql.csv
[MYSQL] Done.

[POSTGRES] Generating 1,000 rows → \\wsl.localhost\Ubuntu\home\nicolas\multi-db-env\data\block_transactions_postgres.csv
[POSTGRES] Done.

[MSSQL] Generating 1,000 rows → \\wsl.localhost\Ubuntu\home\nicolas\multi-db-env\data\block_transactions_mssql.csv


  now = datetime.utcnow()


[MSSQL] Done.

All CSVs ready:
 - block_transactions_oracle.csv
 - block_transactions_mysql.csv
 - block_transactions_postgres.csv
 - block_transactions_mssql.csv
