# LakeBench Local Quickstart

Run LakeBench benchmarks locally using DuckDB with SF=1 (~1 GB).

Steps:
1. Generate TPC-DS and TPC-H datasets
2. Run benchmarks (ELTBench, TPC-DS, TPC-H)
3. View results

In [None]:
import os

DATA_DIR = os.path.join(os.getcwd(), "local_data")
WORKING_DIR = os.path.join(os.getcwd(), "local_working_dir")
RESULTS_DIR = os.path.join(os.getcwd(), "local_results")

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(WORKING_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

## 1. Generate TPC-DS data (SF=1, ~1 GB)

In [None]:
from lakebench.datagen import TPCDSDataGenerator

TPCDSDataGenerator(
    scale_factor=1,
    target_folder_uri=os.path.join(DATA_DIR, "tpcds_sf1")
).run()

## 2. Generate TPC-H data (SF=1, ~1 GB)

In [None]:
from lakebench.datagen import TPCHDataGenerator

TPCHDataGenerator(
    scale_factor=1,
    target_folder_uri=os.path.join(DATA_DIR, "tpch_sf1")
).run()

## 3. Run ELTBench (light mode)

In [None]:
from lakebench.engines import DuckDB
from lakebench.benchmarks import ELTBench

benchmark = ELTBench(
    engine=DuckDB(schema_or_working_directory_uri=os.path.join(WORKING_DIR, "duckdb_eltbench")),
    scenario_name="SF1 - Local",
    input_parquet_folder_uri=os.path.join(DATA_DIR, "tpcds_sf1"),
    save_results=True,
    result_table_uri=RESULTS_DIR
)
benchmark.run(mode="light")

## 4. Run TPC-DS power test (load + 99 queries)

In [None]:
from lakebench.benchmarks import TPCDS

benchmark = TPCDS(
    engine=DuckDB(schema_or_working_directory_uri=os.path.join(WORKING_DIR, "duckdb_tpcds")),
    scenario_name="SF1 - Local",
    input_parquet_folder_uri=os.path.join(DATA_DIR, "tpcds_sf1"),
    save_results=True,
    result_table_uri=RESULTS_DIR
)
benchmark.run(mode="power_test")

## 5. Run TPC-H power test (load + 22 queries)

In [None]:
from lakebench.benchmarks import TPCH

benchmark = TPCH(
    engine=DuckDB(schema_or_working_directory_uri=os.path.join(WORKING_DIR, "duckdb_tpch")),
    scenario_name="SF1 - Local",
    input_parquet_folder_uri=os.path.join(DATA_DIR, "tpch_sf1"),
    save_results=True,
    result_table_uri=RESULTS_DIR
)
benchmark.run(mode="power_test")

## 6. View results

In [None]:
for r in benchmark.results:
    status = "OK" if r["success"] else "FAIL"
    print(f"{r['phase']:>8} | {r['test_item']:<20} | {r['duration_ms']:>8}ms | {status}")