# 05 - SQL Queries (Consultas SQL)

Análise dos dados usando queries SQL no banco de dados SQLite.


In [2]:
import pandas as pd
import sqlite3


## 1. Conectar ao Banco de Dados


In [15]:
# Conectar ao banco
conn = sqlite3.connect('data/pipeline.db')
print("✓ Conexão estabelecida com sucesso!")


✓ Conexão estabelecida com sucesso!


## Query 1: Visão Geral dos Dados


In [5]:
query = """
SELECT 
    COUNT(DISTINCT CustomerID) as total_clientes,
    COUNT(DISTINCT InvoiceNo) as total_pedidos,
    ROUND(SUM(valor_total), 2) as receita_total,
    ROUND(AVG(valor_total), 2) as ticket_medio
FROM vendas
WHERE eh_devolucao = 0
"""

pd.read_sql_query(query, conn)



Unnamed: 0,total_clientes,total_pedidos,receita_total,ticket_medio
0,4340,20726,10642110.8,20.23


## Query 2: Top 10 Produtos Mais Vendidos


In [6]:
query = """
SELECT 
    codigo_produto,
    nome_produto,
    ROUND(receita_total, 2) as receita_total,
    quantidade_vendida
FROM desempenho_produtos
ORDER BY receita_total DESC
LIMIT 10
"""

pd.read_sql_query(query, conn)


Unnamed: 0,codigo_produto,nome_produto,receita_total,quantidade_vendida
0,DOT,DOTCOM POSTAGE,206245.48,707
1,22423,REGENCY CAKESTAND 3 TIER,164459.49,13007
2,47566,PARTY BUNTING,98243.88,18010
3,85123A,WHITE HANGING HEART T-LIGHT HOLDER,97659.94,35006
4,85099B,JUMBO BAG RED RETROSPOT,92175.79,47260
5,23084,RABBIT NIGHT LIGHT,66661.63,30631
6,POST,POSTAGE,66230.64,3003
7,22086,PAPER CHAIN KIT 50'S CHRISTMAS,63715.24,18876
8,84879,ASSORTED COLOUR BIRD ORNAMENT,58792.42,36282
9,79321,CHILLI LIGHTS,53746.66,10225


## Query 3: Top 10 Países por Receita


In [16]:
query = """
SELECT 
    pais,
    ROUND(receita_total, 2) as receita_total,
    total_pedidos,
    clientes_unicos
FROM analise_paises
ORDER BY receita_total DESC
LIMIT 10
"""

pd.read_sql_query(query, conn)


Unnamed: 0,pais,receita_total,total_pedidos,clientes_unicos
0,United Kingdom,8189252.3,23492,3951
1,Netherlands,284661.54,101,9
2,EIRE,262993.38,360,4
3,Germany,221509.47,603,95
4,France,197317.11,461,88
5,Australia,137009.77,69,9
6,Switzerland,56363.05,74,22
7,Spain,54756.03,105,31
8,Belgium,40910.96,119,25
9,Sweden,36585.41,46,8


## Query 4: Vendas Mensais


In [8]:
query = """
SELECT 
    ano_mes,
    ROUND(receita_total, 2) as receita_total,
    total_pedidos,
    clientes_unicos
FROM metricas_mensais
ORDER BY ano_mes
"""

pd.read_sql_query(query, conn)


Unnamed: 0,ano_mes,receita_total,total_pedidos,clientes_unicos
0,2010-12,746723.61,2025,949
1,2011-01,558448.56,1476,784
2,2011-02,497026.41,1393,799
3,2011-03,682013.98,1983,1021
4,2011-04,492367.84,1744,900
5,2011-05,722094.1,2162,1080
6,2011-06,689977.23,2012,1052
7,2011-07,680156.99,1927,994
8,2011-08,703510.58,1735,981
9,2011-09,1017596.68,2327,1303


## Query 5: Top 10 Clientes VIP


In [17]:
query = """
SELECT 
    id_cliente,
    ROUND(valor_total_gasto, 2) as valor_total_gasto,
    frequencia_pedidos,
    dias_ultima_compra
FROM analise_clientes
ORDER BY valor_total_gasto DESC
LIMIT 10
"""

pd.read_sql_query(query, conn)


Unnamed: 0,id_cliente,valor_total_gasto,frequencia_pedidos,dias_ultima_compra
0,0,1469611.65,3708,0
1,14646,279489.02,77,1
2,18102,256438.49,62,0
3,17450,187322.17,55,7
4,14911,132458.73,248,0
5,12415,123725.45,26,23
6,14156,113214.59,66,9
7,17511,88125.38,46,2
8,16684,65892.08,31,3
9,13694,62690.54,60,3


## Query 6: Segmentação de Clientes (RFM)


In [18]:
query = """
SELECT 
    CASE 
        WHEN score_rfm_total >= 10 THEN 'VIP'
        WHEN score_rfm_total >= 7 THEN 'Alto Valor'
        WHEN score_rfm_total >= 5 THEN 'Medio'
        ELSE 'Baixo Valor'
    END as segmento,
    COUNT(*) as total_clientes,
    ROUND(SUM(valor_total_gasto), 2) as receita_total,
    ROUND(AVG(valor_total_gasto), 2) as ticket_medio_cliente
FROM analise_clientes
GROUP BY segmento
ORDER BY receita_total DESC
"""

pd.read_sql_query(query, conn)


Unnamed: 0,segmento,total_clientes,receita_total,ticket_medio_cliente
0,VIP,858,6985288.65,8141.36
1,Alto Valor,1614,2131594.82,1320.69
2,Medio,1067,476329.21,446.42
3,Baixo Valor,834,154918.39,185.75


## Query 7: Resumo de Devoluções


In [19]:
query = """
SELECT 
    ROUND(SUM(CASE WHEN eh_devolucao = 0 THEN valor_total ELSE 0 END), 2) as receita_vendas,
    ROUND(ABS(SUM(CASE WHEN eh_devolucao = 1 THEN valor_total ELSE 0 END)), 2) as valor_devolvido,
    ROUND(100.0 * ABS(SUM(CASE WHEN eh_devolucao = 1 THEN valor_total ELSE 0 END)) / 
          SUM(CASE WHEN eh_devolucao = 0 THEN valor_total ELSE 0 END), 2) as taxa_devolucao_pct
FROM vendas
"""

pd.read_sql_query(query, conn)


Unnamed: 0,receita_vendas,valor_devolvido,taxa_devolucao_pct
0,10642110.8,893979.73,8.4


## Query 8: Produtos Mais Devolvidos


In [None]:
query = """
SELECT 
    codigo_produto,
    nome_produto,
    ROUND(ABS(valor_devolvido), 2) as valor_devolvido,
    ABS(quantidade_devolvida) as quantidade_devolvida
FROM analise_devolucoes
ORDER BY ABS(valor_devolvido) DESC
LIMIT 10
"""

pd.read_sql_query(query, conn)



✓ Vendas Mensais:


Unnamed: 0,ano_mes,total_pedidos,receita_total,clientes_unicos,ticket_medio
0,2010-12,2025,746723.61,949,368.75
1,2011-01,1476,558448.56,784,378.35
2,2011-02,1393,497026.41,799,356.8
3,2011-03,1983,682013.98,1021,343.93
4,2011-04,1744,492367.841,900,282.32
5,2011-05,2162,722094.1,1080,333.99
6,2011-06,2012,689977.23,1052,342.93
7,2011-07,1927,680156.991,994,352.96
8,2011-08,1735,703510.58,981,405.48
9,2011-09,2327,1017596.682,1303,437.3


## Query 9: Vendas por Dia da Semana


In [20]:
query = """
SELECT 
    CASE CAST(strftime('%w', InvoiceDate) AS INTEGER)
        WHEN 0 THEN 'Domingo'
        WHEN 1 THEN 'Segunda'
        WHEN 2 THEN 'Terca'
        WHEN 3 THEN 'Quarta'
        WHEN 4 THEN 'Quinta'
        WHEN 5 THEN 'Sexta'
        WHEN 6 THEN 'Sabado'
    END as dia_semana,
    COUNT(DISTINCT InvoiceNo) as total_pedidos,
    ROUND(SUM(valor_total), 2) as receita_total
FROM vendas
WHERE eh_devolucao = 0
GROUP BY CAST(strftime('%w', InvoiceDate) AS INTEGER)
ORDER BY CAST(strftime('%w', InvoiceDate) AS INTEGER)
"""

pd.read_sql_query(query, conn)


Unnamed: 0,dia_semana,total_pedidos,receita_total
0,Domingo,2207,806790.78
1,Segunda,3237,1775782.07
2,Terca,3717,2175700.51
3,Quarta,3865,1847074.38
4,Quinta,4408,2199292.57
5,Sexta,3292,1837470.49


## Query 10: Crescimento Mensal


In [21]:
query = """
SELECT 
    ano_mes,
    ROUND(receita_total, 2) as receita_total,
    ROUND(100.0 * (receita_total - LAG(receita_total) OVER (ORDER BY ano_mes)) / 
          LAG(receita_total) OVER (ORDER BY ano_mes), 2) as crescimento_pct
FROM metricas_mensais
ORDER BY ano_mes
"""

pd.read_sql_query(query, conn)


Unnamed: 0,ano_mes,receita_total,crescimento_pct
0,2010-12,746723.61,
1,2011-01,558448.56,-25.21
2,2011-02,497026.41,-11.0
3,2011-03,682013.98,37.22
4,2011-04,492367.84,-27.81
5,2011-05,722094.1,46.66
6,2011-06,689977.23,-4.45
7,2011-07,680156.99,-1.42
8,2011-08,703510.58,3.43
9,2011-09,1017596.68,44.65


## Fechar Conexão


In [None]:
conn.close()
print("Conexão encerrada com sucesso!")

print("ANÁLISES SQL CONCLUÍDAS!")



✓ Conexão encerrada com sucesso!

ANÁLISES SQL CONCLUÍDAS!
