# Aula 3: SQL

## Introdução

Nesta aula veremos as principais queries de consulta em sql

## Conexao no db4free via sqlalchemy

In [1]:
import yaml
import sqlalchemy
import pandas as pd
from pandasql import sqldf

In [24]:
# Load SQL credentials
with open('credentials.yml', 'r') as file:
    credentials = yaml.load(file, Loader=yaml.FullLoader)

- A engine deve estar no formato:
- mysql://`user`:`password`@`host`:`port`/`database`

In [25]:
engine = sqlalchemy.create_engine(f'mysql://{credentials["user"]}:{credentials["password"]}@{credentials["host"]}:{credentials["port"]}/{credentials["database"]}') # connect to server

### UPLOAD TABLE

- lista: `[]`
- tupla: `()`
- dicionario: `{}`

In [2]:
raw_data = {
        'subject_id': ['1', '2', '3', '4', '5'],
        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], 
        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches'],
        'value': [20, 30, 50, 80, 100]}

In [8]:
df_a = pd.DataFrame(raw_data)

In [9]:
raw_data = {
        'subject_id': ['4', '5', '6', '7', '8'],
        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], 
        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan'],
        'value': [15, 5, 70, 35, 10]}

df_b = pd.DataFrame(raw_data)

In [10]:
df_a

Unnamed: 0,subject_id,first_name,last_name,value
0,1,Alex,Anderson,20
1,2,Amy,Ackerman,30
2,3,Allen,Ali,50
3,4,Alice,Aoni,80
4,5,Ayoung,Atiches,100


In [11]:
df_b

Unnamed: 0,subject_id,first_name,last_name,value
0,4,Billy,Bonder,15
1,5,Brian,Black,5
2,6,Bran,Balwner,70
3,7,Bryce,Brice,35
4,8,Betty,Btisan,10


## JOIN

![](https://www.dofactory.com/img/sql/sql-joins.png)

In [14]:
query = '''
    SELECT *
    FROM df_a AS a    
    JOIN df_b AS b
    ON a.subject_id = b.subject_id
'''

sqldf(query)

Unnamed: 0,subject_id,first_name,last_name,value,subject_id.1,first_name.1,last_name.1,value.1
0,4,Alice,Aoni,80,4,Billy,Bonder,15
1,5,Ayoung,Atiches,100,5,Brian,Black,5


In [16]:
query = '''
    SELECT a.first_name, b.last_name
    FROM df_a AS a
    JOIN df_b AS b
    ON a.subject_id = b.subject_id    
'''

sqldf(query)

Unnamed: 0,first_name,last_name
0,Alice,Bonder
1,Ayoung,Black


In [17]:
query = '''
    SELECT a.first_name, b.last_name
    FROM df_a a
    JOIN df_b b
    ON a.subject_id = b.subject_id    
'''

sqldf(query)

Unnamed: 0,first_name,last_name
0,Alice,Bonder
1,Ayoung,Black


In [18]:
query = '''
    SELECT a.first_name, b.last_name
    FROM df_a a
    INNER JOIN df_b b
    ON a.subject_id = b.subject_id    
'''

sqldf(query)

Unnamed: 0,first_name,last_name
0,Alice,Bonder
1,Ayoung,Black


### LEFT JOIN

In [19]:
query = '''
    SELECT *
    FROM df_a AS a
    LEFT JOIN df_b AS b
    ON a.subject_id = b.subject_id        
'''
sqldf(query)

Unnamed: 0,subject_id,first_name,last_name,value,subject_id.1,first_name.1,last_name.1,value.1
0,1,Alex,Anderson,20,,,,
1,2,Amy,Ackerman,30,,,,
2,3,Allen,Ali,50,,,,
3,4,Alice,Aoni,80,4.0,Billy,Bonder,15.0
4,5,Ayoung,Atiches,100,5.0,Brian,Black,5.0


In [20]:
df_a

Unnamed: 0,subject_id,first_name,last_name,value
0,1,Alex,Anderson,20
1,2,Amy,Ackerman,30
2,3,Allen,Ali,50
3,4,Alice,Aoni,80
4,5,Ayoung,Atiches,100


### RIGHT JOIN

In [26]:
query = '''
    SELECT *
    FROM df_a AS a
    RIGHT JOIN df_b AS b
    ON a.subject_id = b.subject_id
'''
pd.read_sql_query(query, engine)

Unnamed: 0,index,subject_id,first_name,last_name,value,index.1,subject_id.1,first_name.1,last_name.1,value.1
0,3.0,4.0,Alice,Aoni,80.0,0,4,Billy,Bonder,15
1,4.0,5.0,Ayoung,Atiches,100.0,1,5,Brian,Black,5
2,,,,,,2,6,Bran,Balwner,70
3,,,,,,3,7,Bryce,Brice,35
4,,,,,,4,8,Betty,Btisan,10


## UNION

In [27]:
df_a

Unnamed: 0,subject_id,first_name,last_name,value
0,1,Alex,Anderson,20
1,2,Amy,Ackerman,30
2,3,Allen,Ali,50
3,4,Alice,Aoni,80
4,5,Ayoung,Atiches,100


In [28]:
df_b

Unnamed: 0,subject_id,first_name,last_name,value
0,4,Billy,Bonder,15
1,5,Brian,Black,5
2,6,Bran,Balwner,70
3,7,Bryce,Brice,35
4,8,Betty,Btisan,10


In [33]:
query = '''
    SELECT *
        FROM df_a
    UNION 
    SELECT *
        FROM df_b
'''

sqldf(query)

Unnamed: 0,subject_id,first_name,last_name,value
0,1,Alex,Anderson,20
1,2,Amy,Ackerman,30
2,3,Allen,Ali,50
3,4,Alice,Aoni,80
4,4,Billy,Bonder,15
5,5,Ayoung,Atiches,100
6,5,Brian,Black,5
7,6,Bran,Balwner,70
8,7,Bryce,Brice,35
9,8,Betty,Btisan,10


## CASE

Comando `CASE` se assemelha a uma condicional `se..., então` e sua sintaxe é:

`CASE
    WHEN condition1 THEN result1
    WHEN condition2 THEN result2
    WHEN conditionN THEN resultN
    ELSE result
END;`

In [34]:
df_a

Unnamed: 0,subject_id,first_name,last_name,value
0,1,Alex,Anderson,20
1,2,Amy,Ackerman,30
2,3,Allen,Ali,50
3,4,Alice,Aoni,80
4,5,Ayoung,Atiches,100


In [38]:
query = '''
    SELECT *,
        CASE 
            WHEN value > 50 THEN "maior_que_50"
            WHEN value > 30 THEN "maior_que_30"
            ELSE "menor_ou_igual_a_30"    
        END AS value_category
    
    FROM df_a
'''
sqldf(query)

Unnamed: 0,subject_id,first_name,last_name,value,value_category
0,1,Alex,Anderson,20,menor_ou_igual_a_30
1,2,Amy,Ackerman,30,menor_ou_igual_a_30
2,3,Allen,Ali,50,maior_que_30
3,4,Alice,Aoni,80,Aoni
4,5,Ayoung,Atiches,100,Atiches


In [39]:
query = '''
    SELECT *,
        CASE 
            WHEN value > 50 THEN first_name
            ELSE last_name    
        END AS value_category
    
    FROM df_a
'''
sqldf(query)

Unnamed: 0,subject_id,first_name,last_name,value,value_category
0,1,Alex,Anderson,20,Anderson
1,2,Amy,Ackerman,30,Ackerman
2,3,Allen,Ali,50,Ali
3,4,Alice,Aoni,80,Alice
4,5,Ayoung,Atiches,100,Ayoung


## Pausa - voltamos 20:52