# Libraries

In [1]:
import sqlite3
import pandas as pd

from sqlalchemy import create_engine

# Introdução à linguagem SQL

Os dados foram coletados no Kaggle via link [Olist](https://www.kaggle.com/olistbr/brazilian-ecommerce).

## Inserindo os dados nas tabelas

### tabela x

### 1. Connect to dataset

In [2]:
db = create_engine('sqlite:///db_olist.sqlite', echo=False)
conn = db.connect()

As três barras significam que estamos acessando arquivo local. Podemos conferir o arquivo criado no repositório local. 

### 2. Loading dataset

#### Customer

In [3]:
df_customer = pd.read_csv("https://raw.githubusercontent.com/lucasquemelli/ds_ao_dev/main/olist/olist_customers_dataset.csv")

In [4]:
df_customer.head()

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP


Precisamos criar o mesmo número de colunas com os mesmos nomes das colunas do dataframe acima. 

In [5]:
df_customer.dtypes

customer_id                 object
customer_unique_id          object
customer_zip_code_prefix     int64
customer_city               object
customer_state              object
dtype: object

In [6]:
schema_costumer = """
    
    CREATE TABLE customer(
    
    customer_id              TEXT,
    customer_unique_id       TEXT,
    customer_zip_code_prefix INTEGER,
    customer_city            TEXT,
    customer_state           TEXT 

    )

"""

conn.execute(schema_costumer)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x192e29d2fa0>

Para consultar quais tabelas estão no nosso banco de dados, fazemos:

In [7]:
query = """

    SELECT name FROM sqlite_master
    WHERE type = 'table'

"""

table = pd.read_sql_query(query, conn)
table

Unnamed: 0,name
0,customer


Agora podemos inserir os dados na tabela.

In [9]:
df_customer.to_sql('customer', con=conn, if_exists='append', index=False)

In [10]:
query = """

    SELECT * FROM customer

"""

table = pd.read_sql_query(query, conn)
table

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP
...,...,...,...,...,...
99436,17ddf5dd5d51696bb3d7c6291687be6f,1a29b476fee25c95fbafc67c5ac95cf8,3937,sao paulo,SP
99437,e7b71a9017aa05c9a7fd292d714858e8,d52a67c98be1cf6a5c84435bd38d095d,6764,taboao da serra,SP
99438,5e28dfe12db7fb50a4b2f691faecea5e,e9f50caf99f032f0bf3c55141f019d99,60115,fortaleza,CE
99439,56b18e2166679b8a959d72dd06da27f9,73c2643a0a458b49f58cea58833b192e,92120,canoas,RS


Agora podemos visualizar os dados pelo banco de dados - e não como antes: pelo csv.

#### Geolocation

In [5]:
df_geolocation = pd.read_csv("https://raw.githubusercontent.com/lucasquemelli/ds_ao_dev/main/olist/olist_geolocation_dataset.csv")
df_geolocation.head()

Unnamed: 0,geolocation_zip_code_prefix,geolocation_lat,geolocation_lng,geolocation_city,geolocation_state
0,1037,-23.545621,-46.639292,sao paulo,SP
1,1046,-23.546081,-46.64482,sao paulo,SP
2,1046,-23.546129,-46.642951,sao paulo,SP
3,1041,-23.544392,-46.639499,sao paulo,SP
4,1035,-23.541578,-46.641607,sao paulo,SP


In [6]:
df_geolocation.dtypes

geolocation_zip_code_prefix      int64
geolocation_lat                float64
geolocation_lng                float64
geolocation_city                object
geolocation_state               object
dtype: object

In [7]:
schema_geolocation = """
    
    CREATE TABLE geolocation(
    
    geolocation_zip_code_prefix      INTEGER,
    geolocation_lat                  REAL,
    geolocation_lng                  REAL,
    geolocation_city                 TEXT,
    geolocation_state                TEXT 

    )

"""

conn.execute(schema_geolocation)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1d054dddeb0>

In [9]:
df_geolocation.to_sql('geolocation', con=conn, if_exists='append', index=False)

In [10]:
query = """

    SELECT * FROM geolocation

"""

table = pd.read_sql_query(query, conn)
table

Unnamed: 0,geolocation_zip_code_prefix,geolocation_lat,geolocation_lng,geolocation_city,geolocation_state
0,1037,-23.545621,-46.639292,sao paulo,SP
1,1046,-23.546081,-46.644820,sao paulo,SP
2,1046,-23.546129,-46.642951,sao paulo,SP
3,1041,-23.544392,-46.639499,sao paulo,SP
4,1035,-23.541578,-46.641607,sao paulo,SP
...,...,...,...,...,...
1000158,99950,-28.068639,-52.010705,tapejara,RS
1000159,99900,-27.877125,-52.224882,getulio vargas,RS
1000160,99950,-28.071855,-52.014716,tapejara,RS
1000161,99980,-28.388932,-51.846871,david canabarro,RS


#### Order items

### 3. Check database

In [11]:
query = """

    SELECT name FROM sqlite_master
    WHERE type = 'table'

"""

table = pd.read_sql_query(query, conn)
table

Unnamed: 0,name
0,customer
1,geolocation
