<a href="https://colab.research.google.com/github/marquezjp/SQLToPandas/blob/main/SQLite/Query.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Manipular SQLite com Pandas

## Preparar Ambiente

Para instalar o XlsxWriter

```
!pip install -U XlsxWriter
```

In [None]:
# Carrega a Biblioteca do Oracle, YAML e Pandas
from datetime import date
import csv
import pandas as pd

In [None]:
import xlsxwriter

In [None]:
import sqlite3
from sqlalchemy.engine import create_engine

In [None]:
today = date.today().strftime("%d/%m/%Y")
print("Hoje é", today)

In [None]:
!dir

## Conexão via SQL Alchemy

In [None]:
# Cria a Conexão do SQLite
sqliteConfig = {
    'dialect': 'sqlite',
    'dbworkpath': 'SQLite',
    'dbfilename': 'database',
    'dbext': 'sqlite'
}

dialeto = sqliteConfig.get('dialect', 'sqlite')

dbworkpath = sqliteConfig.get('dbworkpath', '')
dbfilename = sqliteConfig.get('dbfilename', '')
dbext = sqliteConfig.get('dbext', '')
dburl = f'/{dbworkpath}/{dbfilename}.{dbext}' if dbfilename else ''

conexaoSQLite = create_engine(f'{dialeto}://{dburl}', echo=False)
conexaoSQLite

**Principais comandos do SQLite**

```
SELECT name FROM sqlite_master WHERE type='table';'

SELECT name FROM sqlite_master WHERE type ='table' AND name NOT LIKE 'sqlite_%';

SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'table_name';

PRAGMA table_info('table_name')

CREATE INDEX idx_contacts_name ON contacts (first_name, last_name);

CREATE [UNIQUE] INDEX index_name ON table_name ( column_name [, ...] );

DROP INDEX [IF EXISTS] index_name;

SELECT * FROM sqlite_master WHERE type = 'index';

SELECT type, name, tbl_name, sql FROM sqlite_master WHERE type= 'index';

PRAGMA index_list('table_name');

PRAGMA index_info('index_name');

CREATE TABLE table_name(
  chng_id INTEGER PRIMARY KEY,
  acct_no INTEGER REFERENCES account,
  location INTEGER REFERENCES locations,
  amt INTEGER,  -- in cents
  authority TEXT,
  comment TEXT
);
CREATE INDEX index_name ON table_name(acct_no, abs(amt));
```

In [None]:
%%timeit
pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'", conexaoSQLite)

In [None]:
%%timeit
with conexaoSQLite.connect() as con:
    rs = con.execute(f'SELECT name FROM sqlite_master WHERE type='table';')
    for row in rs:
        print row

In [None]:
%%timeit
con = sqlite3.connect(dburl)
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

## Consulta SQL

## Executar a Consulta

In [None]:
%%timeit
base = pd.read_sql_query(SQL, conexaoOracle)
base.head()

In [None]:
camposSelecionados = ['orgao', 'ano', 'mes', 'folha', 'calculo', 'matricula']
#print(base[camposSelecionados].iloc[0].values)
print(base[camposSelecionados].values[0])

## Salva os Dados Selecionados no Arquivo CSV

In [None]:
%%timeit
nomeArquivo = 'sigrh_schema'
base.to_csv(f'{nomeArquivo}.csv', index=False, encoding='utf-8', sep=';', decimal=',', quoting=csv.QUOTE_NONNUMERIC)

In [None]:
%%timeit
nomeArquivo = 'sigrh_schema'
compression_opts = dict(method='zip', archive_name=f'{nomeArquivo}.csv')
base.to_csv(f'{nomeArquivo}.zip', index=False, encoding='utf-8', sep=';', decimal=',', quoting=csv.QUOTE_NONNUMERIC, compression=compression_opts)

## Salva os Dados Selecionados no Excel

In [None]:
%%timeit
dataReferencia = date.today().strftime('%Y%m%d')
nomePlanilha = 'SCHEMA'
nomeArquivo = f'SIGRH-MCZ - {nomePlanilha} - {dataReferencia}.xlsx'

planilha = pd.ExcelWriter(nomeArquivo, engine='xlsxwriter', datetime_format='dd/mm/yy', date_format='dd/mm/yy')
base.to_excel(planilha, sheet_name=nomePlanilha, startrow=0, index=False)

# Formata Tabela
pagina = planilha.sheets[nomePlanilha]
linhas, colunas = base.shape
column_settings = [{'header': column} for column in base.columns.values.tolist()]
tabelaOpcoes = {
    'name': nomePlanilha,
    'style': 'Table Style Medium 11',
    'autofilter': False,
    'columns': column_settings
}
pagina.add_table(0, 0, linhas, colunas - 1, tabelaOpcoes)

# Formata Impressão
pagina.hide_gridlines(2)
pagina.freeze_panes(1, 0)
pagina.print_area(0,0,linhas, colunas)
pagina.repeat_rows(0,0)
pagina.fit_to_pages(1,0)
pagina.set_paper(9) # A4
pagina.set_portrait() # set_landscape or set_portrait()
#pagina.center_horizontally()
#pagina.center_vertically()
pagina.set_margins(left=0.7, right=0.7, top=1.14, bottom=0.75)

# Formata Cabecalho e Rodape
pagina.set_header('&R' + 'Data: &D')
pagina.set_footer('&R' +  'Página: &P of &N')

# Grava a Planilha
planilha.save()

In [None]:
pd.read_excel(nomeArquivo, sheet_name=nomePlanilha, engine='openpyxl').head()

## Salva os Dados Selecionados no Google Sheet

In [None]:
# Carregar as Bibliotecas e Prover a Autenticação no Google Drive
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials

gc = gspread.authorize(GoogleCredentials.get_application_default())

In [None]:
%%timeit
dataReferencia = date.today().strftime('%Y%m%d')
nomePlanilha = 'BASE'
nomeArquivo = f'SIGRH-MCZ - {nomePlanilha} - {dataReferencia}'

base = pd.read_sql(SQL, conexaoOracle)

# Criar a Planilha
planilha = gc.create(nomeArquivo)
planilha = gc.open(nomeArquivo)
pagina = municipios_arquivo.add_worksheet(nomePlanilha,rows=len(base),cols=len(base.columns))
pagina.update([base.columns.values.tolist()] + base.values.tolist())

## Salva os Dados Selecionados no SQLite

In [None]:
%%timeit
nomeTabela = 'schema'
base.to_sql(name=nomeTabela, con=conexaoSQLite, if_exists="replace", index=False)

In [None]:
%%timeit
pd.read_sql(f'select * from {nomeTabela}', conexaoSQLite).head()

## Salva os Dados Selecionados em partes no SQLite

In [None]:
%%timeit
camposSelecionados = ['orgao', 'ano', 'mes', 'folha', 'calculo', 'matricula']
tamanhoPartes=10**5
nomeTabela = 'contracheque'
for partesSQL in pd.read_sql_query(SQL, conexaoOracle, chunksize=tamanhoPartes):
    partesSQL.to_sql(name=nomeTabela, con=conexaoSQLite, if_exists="append", index=False)
    print(partesSQL[camposSelecionados].values[0])
    #print(partesSQL.iloc[0, 1])

In [None]:
%%timeit
pd.read_sql(f'select count(*) from {nomeTabela}', conexaoSQLite).head()

In [None]:
pd.read_sql(f'select orgao, count(*) from {nomeTabela} where ano = "2020" group by orgao', conexaoSQLite)

In [None]:
%%timeit
with conexaoSQLite.connect() as con:
    rs = con.execute(f'select orgao, count(*) from {nomeTabela} where ano = "2020" group by orgao')
    for row in rs:
        print row

## Dump Todas as Tabelas SQLite

In [None]:
def to_csv():
    db = sqlite3.connect('database.db')
    cursor = db.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    for table_name in tables:
        table_name = table_name[0]
        table = pd.read_sql_query("SELECT * from %s" % table_name, db)
        table.to_csv(table_name + '.csv', index_label='index')
    cursor.close()
    db.close()

## Mostrar os Nomes de Tabelas e Nomes de Colunas de Todas as Tableas do SQLite  

In [None]:
# print out the table names and the column names for those tables
db_filename = 'database.sqlite'
newline_indent = '\n   '

db=sqlite3.connect(db_filename)
db.text_factory = str
cur = db.cursor()

result = cur.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
table_names = sorted(list(zip(*result))[0])
print ("\ntables are:"+newline_indent+newline_indent.join(table_names))

for table_name in table_names:
    result = cur.execute("PRAGMA table_info('%s')" % table_name).fetchall()
    column_names = list(zip(*result))[1]
    print (("\ncolumn names for %s:" % table_name)
           +newline_indent
           +(newline_indent.join(column_names)))

db.close()
print ("\nexiting.")

In [None]:
# Convert file existing_db.db to SQL dump file dump.sql
import sqlite3, os

con = sqlite3.connect('existing_db.db')
with open('dump.sql', 'w') as f:
    for line in con.iterdump():
        f.write('%s\n' % line)