GWarp is a lightning-fast, memory-efficient data migration tool that can move data from anything to anything. Built in Go, it supports complex multi-sink pipelines, automatic schema detection, and fault-tolerant execution with resume capabilities.
| Source/Target | Read | Write | Schema Detection |
|---|---|---|---|
| PostgreSQL | β | β | β |
| BigQuery | β | β | β |
| CSV Files | β | β | β |
| Kafka | β | β | β |
| REST API | β | β | β |
Legend: β Available, β Not Supported
Option 1: Download Binary
# Download latest release for your platform
curl -L https://github.com/riandyhasan/gwarp/releases/latest/download/gwarp-linux-amd64 -o gwarp
chmod +x gwarp
sudo mv gwarp /usr/local/bin/Option 2: Docker
docker pull riandyhasan/gwarp:latest- Create a configuration file (
postgres-to-bigquery.yaml):
pipeline:
id: 'user-migration-001'
name: 'User Data Migration'
stages:
- id: 'postgres-source'
type: 'postgres'
config:
host: 'localhost'
port: 5432
database: 'userdb'
table: 'users'
username: 'postgres'
password: '${POSTGRES_PASSWORD}'
outputs: ['bigquery-sink']
- id: 'bigquery-sink'
type: 'bigquery'
config:
project_id: 'my-project'
dataset: 'analytics'
table: 'users'
credentials_path: '/path/to/service-account.json'
inputs: ['postgres-source']
batch:
size: 10000
size_mb: 100
timeout_s: 30
parallelism:
workers: 4
max_memory_mb: 2048- Run the migration:
# Run migration to completion
gwarp migrate --config postgres-to-bigquery.yaml
# Or run in background daemon mode
gwarp migrate --config postgres-to-bigquery.yaml --daemon- Monitor progress (daemon mode only):
# Check status of running migration
gwarp status user-migration-001
# Follow logs in real-time
gwarp logs user-migration-001 --follow
# Resume if failed
gwarp resume user-migration-001# PostgreSQL to BigQuery (run to completion)
gwarp migrate --config configs/examples/postgres-to-bigquery.yaml
# CSV to PostgreSQL (run in background)
gwarp migrate --config configs/examples/csv-to-postgres.yaml --daemon# One source, multiple destinations
pipeline:
stages:
- id: 'source'
type: 'postgres'
outputs: ['kafka', 'csv-backup']
- id: 'kafka'
type: 'kafka'
inputs: ['source']
outputs: ['bigquery']
- id: 'csv-backup'
type: 'csv'
inputs: ['source']
- id: 'bigquery'
type: 'bigquery'
inputs: ['kafka']# Use environment variables in config
export POSTGRES_PASSWORD="secret123"
export BQ_PROJECT_ID="my-project"
gwarp migrate --config production-config.yaml --env production# Validate configuration without executing
gwarp validate --config complex-pipeline.yaml --dry-run
# Test connectivity to all systems
gwarp validate --config my-config.yaml --check-connectionspipeline:
id: 'unique-pipeline-id' # Required: Unique identifier
name: 'Human readable name' # Required: Display name
# Global settings
batch:
size: 10000 # Records per batch
size_mb: 100 # Memory limit per batch (MB)
timeout_s: 30 # Batch timeout (seconds)
parallelism:
workers: 4 # Number of parallel workers
max_memory_mb: 2048 # Maximum memory usage (MB)
error_handling:
max_retries: 3 # Maximum retry attempts
retry_backoff_ms: 1000 # Retry backoff (milliseconds)
error_threshold_percent: 5 # Fail if >5% errors
skip_invalid_records: true # Skip invalid records
progress:
report_interval_s: 5 # Progress report interval
enable_historical: true # Store historical progress
checkpoint:
enabled: true # Enable checkpointing
interval_s: 60 # Checkpoint interval
storage_type: 'file' # Storage type (file, memory)
storage_path: '/tmp/checkpoints' # Storage location
stages:
# See connector-specific examples below- id: 'postgres-stage'
type: 'postgres'
config:
host: 'localhost'
port: 5432
database: 'mydb'
table: 'users' # Optional: specific table
query: 'SELECT * FROM users WHERE active = true' # Optional: custom query
username: 'postgres'
password: '${POSTGRES_PASSWORD}'
ssl_mode: 'disable' # disable, require, verify-ca, verify-full
max_connections: 10 # Connection pool size
partitioning:
strategy: 'primary_key_range' # primary_key_range, timestamp, custom
partition_size: 50000 # Records per partition
partition_column: 'id' # Column to partition on- id: 'bigquery-stage'
type: 'bigquery'
config:
project_id: 'my-project'
dataset: 'analytics'
table: 'users'
credentials_path: '/path/to/service-account.json'
location: 'US' # Dataset location
write_disposition: 'WRITE_APPEND' # WRITE_APPEND, WRITE_TRUNCATE, WRITE_EMPTY
create_disposition: 'CREATE_IF_NEEDED' # CREATE_IF_NEEDED, CREATE_NEVER
partitioning:
strategy: 'timestamp' # timestamp, ingestion_time
partition_column: 'created_at' # Column to partition on- id: 'csv-stage'
type: 'csv'
config:
file_path: '/data/users.csv'
delimiter: ',' # Field delimiter
quote_char: '"' # Quote character
escape_char: "\\" # Escape character
header: true # First row is header
encoding: 'utf-8' # File encoding
compression: 'gzip' # none, gzip, zip
buffer_size_mb: 64 # Read/write buffer size- id: 'kafka-stage'
type: 'kafka'
config:
brokers: ['localhost:9092']
topic: 'user-events'
partition_key: 'user_id' # Message partitioning key
consumer_group: 'gwarp-consumer' # Consumer group (for reading)
security_protocol: 'PLAINTEXT' # PLAINTEXT, SSL, SASL_PLAINTEXT, SASL_SSL
batch_size: 1000 # Messages per batch
timeout_ms: 30000 # Operation timeout# Migrate data using configuration
gwarp migrate --config CONFIG_FILE [flags]
# Validate configuration
gwarp validate --config CONFIG_FILE [flags]
# Check pipeline status (daemon mode only)
gwarp status PIPELINE_ID [flags]
# Resume failed pipeline
gwarp resume PIPELINE_ID [flags]
# View pipeline logs (daemon mode only)
gwarp logs PIPELINE_ID [flags]
# Generate configuration templates
gwarp template CONNECTOR_TYPE [flags]--config string Configuration file path
--env string Environment profile (dev, staging, prod)
--daemon Run migration in background daemon mode
--verbose Enable verbose logging
--dry-run Validate without executing
--check-connections Test connectivity to all systems
--output string Output format (table, json, yaml)# Basic migration (run to completion)
gwarp migrate --config postgres-to-bigquery.yaml
# Background migration with monitoring
gwarp migrate --config postgres-to-bigquery.yaml --daemon
# Validate with connection check
gwarp validate --config my-pipeline.yaml --check-connections
# Resume failed migration
gwarp resume pipeline-001 --verbose
# Follow logs in real-time (daemon mode)
gwarp logs pipeline-001 --follow --tail 100
# Generate PostgreSQL template
gwarp template postgres --output postgres-template.yaml# For maximum throughput
parallelism:
workers: 8 # Match CPU cores
max_memory_mb: 4096 # Use available RAM
batch:
size: 50000 # Larger batches
size_mb: 200 # Higher memory per batch
# For memory-constrained environments
parallelism:
workers: 2 # Fewer workers
max_memory_mb: 1024 # Conservative memory
batch:
size: 5000 # Smaller batches
size_mb: 50 # Lower memory per batchForeground Mode:
gwarp migrate --config postgres-to-bigquery.yaml
# Progress: 750,000 / 1,000,000 (75%) | 8,500 rec/s | ETA: 2m 15sDaemon Mode:
gwarp status pipeline-001
# Output:
# Pipeline: pipeline-001 (User Data Migration)
# Status: RUNNING
# Progress: 750,000 / 1,000,000 (75%)
# Throughput: 8,500 records/sec
# ETA: 2m 15s
# Memory Usage: 1.2GB / 2.0GB
# Error Rate: 0.02%High Memory Usage
# Reduce batch size and workers
batch:
size: 1000
size_mb: 25
parallelism:
workers: 2Slow Performance
# Increase parallelism and batch size
parallelism:
workers: 8
batch:
size: 20000Connection Timeouts
# Increase timeouts
batch:
timeout_s: 120
connector_config:
connection_timeout_s: 60# Clone repository
git clone https://github.com/riandyhasan/gwarp.git
cd gwarp
# Install dependencies
go mod download
# Run tests
make test
# Build binary
make build
# Run linter
make lintDeveloped by @riandyhasan