# Capitulo 09 Integracao Outros Servicos Cloud

Notebook gerado automaticamente a partir do código fonte python.


In [None]:
# -*- coding: utf-8 -*-
"""
capitulo_09_integracao_outros_servicos_cloud
"""

# capitulo_09_integracao_outros_servicos_cloud
import duckdb
import os

# Exemplo/Bloco 1
import duckdb
con = duckdb.connect(database=':memory:')

con.execute("""
-- Criar secret para Cloudflare R2
CREATE SECRET r2_secret (
    TYPE r2,
    KEY_ID 'AKIAIOSFODNN7EXAMPLE',
    SECRET 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',
    ACCOUNT_ID 'my_account_id'
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Usar protocolo r2://
SELECT * FROM 'r2://my-bucket/data.parquet';

-- Ou read_parquet
SELECT * FROM read_parquet('r2://my-bucket/data/*.parquet');
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Escrever para R2
COPY my_table TO 'r2://my-bucket/output.parquet' (
    FORMAT parquet,
    COMPRESSION zstd
);

-- Particionar dados
COPY sales TO 'r2://my-bucket/sales' (
    FORMAT parquet,
    PARTITION_BY (year, month)
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- 1. Configurar secret
CREATE PERSISTENT SECRET r2_production (
    TYPE r2,
    KEY_ID 'your_r2_key_id',
    SECRET 'your_r2_secret',
    ACCOUNT_ID 'your_account_id',
    SCOPE 'r2://production-data'
);

-- 2. Ler dados
SELECT
    product_id,
    sum(amount) as total_sales
FROM 'r2://production-data/transactions/**/*.parquet'
WHERE date >= '2024-01-01'
GROUP BY product_id;

-- 3. Escrever resultado
COPY (
    SELECT
        product_id,
        sum(amount) as total_sales
    FROM 'r2://production-data/transactions/**/*.parquet'
    WHERE date >= '2024-01-01'
    GROUP BY product_id
) TO 'r2://production-data/reports/monthly_sales.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Criar secret para GCS
CREATE SECRET gcs_secret (
    TYPE gcs,
    KEY_ID 'my_hmac_access_id',
    SECRET 'my_hmac_secret_key'
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Protocolo gs://
SELECT * FROM 'gs://my-bucket/data.parquet';

-- Ou gcs://
SELECT * FROM 'gcs://my-bucket/data.parquet';

-- Globbing
SELECT * FROM 'gs://my-bucket/data/**/*.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Escrever para GCS
COPY my_table TO 'gs://my-bucket/output.parquet' (
    FORMAT parquet,
    COMPRESSION zstd
);

-- Com particionamento
COPY events TO 'gcs://my-bucket/events' (
    FORMAT parquet,
    PARTITION_BY (year, month, day)
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Ler do S3, escrever para GCS
COPY (
    SELECT *
    FROM 's3://aws-bucket/source-data/*.parquet'
    WHERE processed = false
) TO 'gs://gcs-bucket/processed-data/' (
    FORMAT parquet,
    PARTITION_BY (date)
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Secret para MinIO local
CREATE SECRET minio_secret (
    TYPE s3,
    PROVIDER config,
    KEY_ID 'minioadmin',
    SECRET 'minioadmin',
    ENDPOINT 'localhost:9000',
    URL_STYLE 'path',
    USE_SSL false
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Conectar ao MinIO
CREATE SECRET minio_local (
    TYPE s3,
    PROVIDER config,
    KEY_ID 'minioadmin',
    SECRET 'minioadmin',
    ENDPOINT 'localhost:9000',
    URL_STYLE 'path',
    USE_SSL false
);

-- Usar normalmente
SELECT * FROM 's3://test-bucket/data.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Secret para lakeFS
CREATE SECRET lakefs_secret (
    TYPE s3,
    PROVIDER config,
    KEY_ID 'AKIAIOSFODNN7EXAMPLE',
    SECRET 'your_lakefs_secret',
    ENDPOINT 'lakefs.example.com',
    URL_STYLE 'path',
    USE_SSL true
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Ler da branch main
SELECT * FROM 's3://repo-name/main/data/*.parquet';

-- Ler de uma branch de desenvolvimento
SELECT * FROM 's3://repo-name/dev-branch/data/*.parquet';

-- Comparar branches
SELECT
    'main' as branch,
    count(*) as records
FROM 's3://repo-name/main/data/*.parquet'
UNION ALL
SELECT
    'dev',
    count(*)
FROM 's3://repo-name/dev-branch/data/*.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- AWS S3
CREATE PERSISTENT SECRET aws_production (
    TYPE s3,
    PROVIDER credential_chain,
    SCOPE 's3://aws-production'
);

-- Cloudflare R2
CREATE PERSISTENT SECRET r2_backup (
    TYPE r2,
    KEY_ID 'r2_key',
    SECRET 'r2_secret',
    ACCOUNT_ID 'account_id',
    SCOPE 'r2://backup-bucket'
);

-- Google Cloud Storage
CREATE PERSISTENT SECRET gcs_analytics (
    TYPE gcs,
    KEY_ID 'gcs_hmac_key',
    SECRET 'gcs_hmac_secret',
    SCOPE 'gs://analytics-bucket'
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Ler de S3, processar, escrever para R2 e GCS
WITH processed_data AS (
    SELECT
        date,
        region,
        sum(amount) as total_amount,
        count(*) as transactions
    FROM 's3://aws-production/raw-data/**/*.parquet'
    WHERE date >= current_date() - INTERVAL '7 days'
    GROUP BY date, region
)
-- Backup para R2
, backup AS (
    SELECT * FROM (
        COPY processed_data TO 'r2://backup-bucket/processed/' || current_date() || '.parquet'
    )
)
-- Analytics para GCS
SELECT * FROM (
    COPY processed_data TO 'gs://analytics-bucket/reports/' || current_date() || '.parquet'
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Arquivo Parquet público
SELECT * FROM 'https://example.com/public/data.parquet';

-- CSV público
SELECT * FROM read_csv_auto('https://example.com/data.csv');
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Criar secret com Bearer token
CREATE SECRET http_auth (
    TYPE http,
    BEARER_TOKEN 'your_bearer_token'
);

-- Acessar API protegida
SELECT * FROM 'https://api.example.com/data.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Headers customizados
CREATE SECRET http_custom (
    TYPE http,
    EXTRA_HTTP_HEADERS MAP {
        'Authorization': 'Bearer token_value',
        'X-API-Key': 'api_key_value',
        'X-Custom-Header': 'custom_value'
    }
);

-- Usar automaticamente
SELECT * FROM 'https://api.example.com/protected/data.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Configurar autenticação
CREATE SECRET api_secret (
    TYPE http,
    EXTRA_HTTP_HEADERS MAP {
        'Authorization': 'Bearer eyJhbGc...',
        'X-API-Version': 'v2'
    }
);

-- Ler dados de API
SELECT
    user_id,
    count(*) as events
FROM 'https://analytics-api.example.com/exports/events.parquet'
WHERE date >= '2024-01-01'
GROUP BY user_id;
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Configurar proxy com secret
CREATE SECRET http_proxy (
    TYPE http,
    HTTP_PROXY 'http://proxy.company.com:8080',
    HTTP_PROXY_USERNAME 'username',
    HTTP_PROXY_PASSWORD 'password'
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
SET http_proxy = 'http://proxy.company.com:8080';
SET http_proxy_username = 'username';
SET http_proxy_password = 'password';
""")
print(con.fetchall()) # Inspect result

con.execute("""
LOAD httpfs;
SET ca_cert_file = '/path/to/corporate-ca-bundle.crt';
SET enable_server_cert_verification = true;
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Backup automático de S3 para R2
COPY (
    SELECT *
    FROM 's3://primary-bucket/data/**/*.parquet'
    WHERE last_modified >= current_date()
) TO 'r2://backup-bucket/daily/' || current_date() || '/' (
    FORMAT parquet,
    PARTITION_BY (region),
    COMPRESSION zstd
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Ler de múltiplas fontes
SELECT * FROM 's3://aws-datalake/orders/*.parquet'
UNION ALL
SELECT * FROM 'gs://gcp-datalake/orders/*.parquet'
UNION ALL
SELECT * FROM 'r2://r2-datalake/orders/*.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Hot data (acesso frequente) no S3
-- Cold data (arquivo) no R2 (sem egress cost)

-- Ler hot data
SELECT * FROM 's3://hot-bucket/recent-data/*.parquet'
WHERE date >= current_date() - INTERVAL '30 days'

UNION ALL

-- Ler cold data (quando necessário)
SELECT * FROM 'r2://cold-bucket/archive/**/*.parquet'
WHERE date < current_date() - INTERVAL '30 days'
  AND date >= '2023-01-01';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- S3
SELECT which_secret('s3://my-bucket/file.parquet', 's3');

-- R2
SELECT which_secret('r2://my-bucket/file.parquet', 'r2');

-- GCS
SELECT which_secret('gs://my-bucket/file.parquet', 'gcs');
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Listar secrets por tipo
SELECT * FROM duckdb_secrets() WHERE type = 's3';
SELECT * FROM duckdb_secrets() WHERE type = 'r2';
SELECT * FROM duckdb_secrets() WHERE type = 'gcs';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Para MinIO ou serviços customizados, verifique endpoint
CREATE OR REPLACE SECRET test_secret (
    TYPE s3,
    PROVIDER config,
    KEY_ID 'key',
    SECRET 'secret',
    ENDPOINT 'correct-endpoint.com',  -- Verifique URL correta
    USE_SSL true
);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- 1. Instalar e iniciar MinIO (via Docker)
-- docker run -p 9000:9000 -p 9001:9001 minio/minio server /data

-- 2. Configurar secret
CREATE SECRET minio_test (
    TYPE s3,
    PROVIDER config,
    KEY_ID 'minioadmin',
    SECRET 'minioadmin',
    ENDPOINT 'localhost:9000',
    URL_STYLE 'path',
    USE_SSL false
);

-- 3. Criar bucket via console (localhost:9001)
-- 4. Testar escrita
CREATE TABLE test AS SELECT range as id FROM range(100);
COPY test TO 's3://test-bucket/data.parquet';

-- 5. Testar leitura
SELECT count(*) FROM 's3://test-bucket/data.parquet';
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Simular leitura de múltiplas fontes
WITH s3_data AS (
    SELECT 's3' as source, * FROM 's3://bucket1/data.parquet'
),
r2_data AS (
    SELECT 'r2' as source, * FROM 'r2://bucket2/data.parquet'
)
SELECT
    source,
    count(*) as records,
    sum(amount) as total
FROM (
    SELECT * FROM s3_data
    UNION ALL
    SELECT * FROM r2_data
)
GROUP BY source;
""")
print(con.fetchall()) # Inspect result


