# Capitulo 01 Introducao Ao Duckdb

Notebook gerado automaticamente a partir do código fonte python.


In [None]:
# -*- coding: utf-8 -*-
"""
capitulo_01_introducao_ao_duckdb
"""

# capitulo_01_introducao_ao_duckdb
import duckdb
import os

# Exemplo/Bloco 1
import duckdb

import importlib.util


def has_module(name):
    return importlib.util.find_spec(name) is not None

def safe_install_ext(con, ext_name):
    try:
        con.execute(f"INSTALL {ext_name}")
        con.execute(f"LOAD {ext_name}")
        return True
    except Exception as e:
        print(f"Warning: Failed to install/load {ext_name} extension: {e}")
        return False

con = duckdb.connect(database=':memory:')

con.execute("""
-- Analisar um arquivo CSV grande
SELECT
    region,
    AVG(sales) as avg_sales,
    COUNT(*) as total_orders
FROM read_csv('sales_data.csv')
GROUP BY region
ORDER BY avg_sales DESC;
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- Transformar e exportar dados
COPY (
    SELECT * FROM read_json('input.json')
    WHERE date >= '2024-01-01'
) TO 'output.parquet' (FORMAT PARQUET);
""")
print(con.fetchall()) # Inspect result

con.execute("""
-- 1. Carregar extensão automaticamente
SELECT * FROM delta_scan('./my_delta_table');

-- 2. Criar view sobre dados Delta
CREATE VIEW sales AS
SELECT * FROM delta_scan('./delta_tables/sales');

-- 3. Análise complexa
SELECT
    DATE_TRUNC('month', order_date) as month,
    product_category,
    SUM(revenue) as total_revenue,
    AVG(revenue) as avg_revenue,
    COUNT(DISTINCT customer_id) as unique_customers
FROM sales
WHERE order_date >= '2024-01-01'
GROUP BY 1, 2
HAVING SUM(revenue) > 10000
ORDER BY 1 DESC, 3 DESC;

-- 4. Exportar resultados
COPY (
    SELECT * FROM sales WHERE region = 'North America'
) TO 'na_sales.parquet' (FORMAT PARQUET, COMPRESSION ZSTD);
""")
print(con.fetchall()) # Inspect result

