# Part 2.2: Filtering and Sorting Data in SQL

Importing sqlite3 to run queries in python:

In [1]:
import pandas as pd
import sqlite3

Connecting to the database and creating a cursor object:

In [2]:
connection = sqlite3.connect('chinook.db')
cursor = connection.cursor()

## Operations with Text

Concatenating two strings:

In [4]:
query = """

SELECT 'Hi ' || 'there' AS hi_there;
    
"""

pd.read_sql_query(query,connection)

Unnamed: 0,hi_there
0,Hi there


Using aliases:

In [5]:
query = """

SELECT
    employee_id AS id,
    first_name AS first,
    last_name AS last,
    phone AS work_phone
FROM
    employee;
    
"""

pd.read_sql_query(query,connection)

Unnamed: 0,id,first,last,work_phone
0,1,Andrew,Adams,+1 (780) 428-9482
1,2,Nancy,Edwards,+1 (403) 262-3443
2,3,Jane,Peacock,+1 (403) 262-3443
3,4,Margaret,Park,+1 (403) 263-4423
4,5,Steve,Johnson,1 (780) 836-9987
5,6,Michael,Mitchell,+1 (403) 246-9887
6,7,Robert,King,+1 (403) 456-9986
7,8,Laura,Callahan,+1 (403) 467-3351


Operations Between Columns and Strings:

In [6]:
query = """

SELECT
first_name || ' Chinook' AS first_name_chinook
FROM
employee;
    
"""

pd.read_sql_query(query,connection)

Unnamed: 0,first_name_chinook
0,Andrew Chinook
1,Nancy Chinook
2,Jane Chinook
3,Margaret Chinook
4,Steve Chinook
5,Michael Chinook
6,Robert Chinook
7,Laura Chinook


Operations with Numeric Columns

In [7]:
query = """

SELECT
unit_price * quantity AS total_line,
unit_price * 0.85 AS unit_price_eur
FROM
invoice_line
LIMIT
5;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,total_line,unit_price_eur
0,0.99,0.8415
1,0.99,0.8415
2,0.99,0.8415
3,0.99,0.8415
4,0.99,0.8415


## Functions

Rounding:

In [7]:
query = """

SELECT
    invoice_id,
    customer_id,
    ROUND(total,0) AS rounded_total
FROM
    invoice
LIMIT
    10;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,customer_id,rounded_total
0,1,18,16.0
1,2,30,10.0
2,3,40,2.0
3,4,18,8.0
4,5,27,17.0
5,6,31,2.0
6,7,49,11.0
7,8,59,10.0
8,9,18,9.0
9,10,31,2.0


Results of Functions as Arguments:

In [8]:
query = """

SELECT
    ROUND(unit_price * 0.85,2) AS unit_price_eur,
    LENGTH(ROUND(unit_price * 0.85,2)) AS len_unit_price_eur
FROM
    invoice_line
LIMIT
    5;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,unit_price_eur,len_unit_price_eur
0,0.84,4
1,0.84,4
2,0.84,4
3,0.84,4
4,0.84,4


Integer division and CAST:

In [9]:
query = """

SELECT
milliseconds / 60000 AS integer_minutes,
CAST(milliseconds AS REAL) / 60000 AS float_minutes
FROM
track
LIMIT
10;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,integer_minutes,float_minutes
0,5,5.72865
1,5,5.709367
2,3,3.84365
3,4,4.20085
4,6,6.256967
5,3,3.4277
6,3,3.898767
7,3,3.5139
8,3,3.385033
9,4,4.391617


Uppercase and lowercase:

In [10]:
query = """

SELECT
LOWER(last_name) as lowercase_last_name
FROM
employee;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,lowercase_last_name
0,adams
1,edwards
2,peacock
3,park
4,johnson
5,mitchell
6,king
7,callahan


SUBSTRING:

In [11]:
query = """

SELECT
SUBSTRING(first_name,1,3) AS first_three_letters
FROM
employee;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,first_three_letters
0,And
1,Nan
2,Jan
3,Mar
4,Ste
5,Mic
6,Rob
7,Lau


REPLACE:

In [12]:
query = """

SELECT
REPLACE(first_name || " " || last_name,'s','$') AS full_name
FROM
employee;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,full_name
0,Andrew Adam$
1,Nancy Edward$
2,Jane Peacock
3,Margaret Park
4,Steve John$on
5,Michael Mitchell
6,Robert King
7,Laura Callahan


## Expressions

Filtering Results with WHERE:

In [4]:
query = """

SELECT
    first_name,
    city,
    title
FROM
    employee
WHERE
    city = 'Lethbridge';

"""

pd.read_sql_query(query,connection)

Unnamed: 0,first_name,city,title
0,Robert,Lethbridge,IT Staff
1,Laura,Lethbridge,IT Staff


## Filtering I - Logical Operators

NOT Equal:

In [6]:
query = """

SELECT
    *
FROM
    employee
WHERE NOT
    -- The four characters from the first character in the birthday value equal 1973
    SUBSTRING(birthdate,1,4) = '1973';

"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,last_name,first_name,title,reports_to,birthdate,hire_date,address,city,state,country,postal_code,phone,fax,email
0,1,Adams,Andrew,General Manager,,1962-02-18 00:00:00,2016-08-14 00:00:00,11120 Jasper Ave NW,Edmonton,AB,Canada,T5K 2N1,+1 (780) 428-9482,+1 (780) 428-3457,andrew@chinookcorp.com
1,2,Edwards,Nancy,Sales Manager,1.0,1958-12-08 00:00:00,2016-05-01 00:00:00,825 8 Ave SW,Calgary,AB,Canada,T2P 2T3,+1 (403) 262-3443,+1 (403) 262-3322,nancy@chinookcorp.com
2,4,Park,Margaret,Sales Support Agent,2.0,1947-09-19 00:00:00,2017-05-03 00:00:00,683 10 Street SW,Calgary,AB,Canada,T2P 5G3,+1 (403) 263-4423,+1 (403) 263-4289,margaret@chinookcorp.com
3,5,Johnson,Steve,Sales Support Agent,2.0,1965-03-03 00:00:00,2017-10-17 00:00:00,7727B 41 Ave,Calgary,AB,Canada,T3B 1Y7,1 (780) 836-9987,1 (780) 836-9543,steve@chinookcorp.com
4,7,King,Robert,IT Staff,6.0,1970-05-29 00:00:00,2017-01-02 00:00:00,590 Columbia Boulevard West,Lethbridge,AB,Canada,T1K 5N8,+1 (403) 456-9986,+1 (403) 456-8485,robert@chinookcorp.com
5,8,Callahan,Laura,IT Staff,6.0,1968-01-09 00:00:00,2017-03-04 00:00:00,923 7 ST NW,Lethbridge,AB,Canada,T1H 1Y8,+1 (403) 467-3351,+1 (403) 467-8772,laura@chinookcorp.com


Multiple Conditions:

In [7]:
query = """

SELECT
    *
FROM
    employee
WHERE
    SUBSTRING(first_name,1,1) = 'M'
    AND 
    SUBSTRING(hire_date,1,4) = '2016';

"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,last_name,first_name,title,reports_to,birthdate,hire_date,address,city,state,country,postal_code,phone,fax,email
0,6,Mitchell,Michael,IT Manager,1,1973-07-01 00:00:00,2016-10-17 00:00:00,5827 Bowness Road NW,Calgary,AB,Canada,T3B 0C5,+1 (403) 246-9887,+1 (403) 246-9899,michael@chinookcorp.com


In [9]:
query = """

SELECT
    *
FROM
    employee
WHERE
    (LENGTH(first_name) = 5 OR LENGTH(first_name) = 6)
    AND
    SUBSTRING(hire_date,1,4) = '2017';

"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,last_name,first_name,title,reports_to,birthdate,hire_date,address,city,state,country,postal_code,phone,fax,email
0,5,Johnson,Steve,Sales Support Agent,2,1965-03-03 00:00:00,2017-10-17 00:00:00,7727B 41 Ave,Calgary,AB,Canada,T3B 1Y7,1 (780) 836-9987,1 (780) 836-9543,steve@chinookcorp.com
1,7,King,Robert,IT Staff,6,1970-05-29 00:00:00,2017-01-02 00:00:00,590 Columbia Boulevard West,Lethbridge,AB,Canada,T1K 5N8,+1 (403) 456-9986,+1 (403) 456-8485,robert@chinookcorp.com
2,8,Callahan,Laura,IT Staff,6,1968-01-09 00:00:00,2017-03-04 00:00:00,923 7 ST NW,Lethbridge,AB,Canada,T1H 1Y8,+1 (403) 467-3351,+1 (403) 467-8772,laura@chinookcorp.com


## Filtering II - Complex Expressions

Comparison Operators:

In [10]:
query = """


SELECT employee_id, last_name, first_name, title, reports_to, hire_date
  FROM employee
 WHERE SUBSTRING(hire_date, 1, 4) != '2016';


"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,last_name,first_name,title,reports_to,hire_date
0,3,Peacock,Jane,Sales Support Agent,2,2017-04-01 00:00:00
1,4,Park,Margaret,Sales Support Agent,2,2017-05-03 00:00:00
2,5,Johnson,Steve,Sales Support Agent,2,2017-10-17 00:00:00
3,7,King,Robert,IT Staff,6,2017-01-02 00:00:00
4,8,Callahan,Laura,IT Staff,6,2017-03-04 00:00:00


Between two values:

In [12]:
query = """

SELECT
    *
FROM
    track
WHERE
    milliseconds BETWEEN (17*60*1000) AND (19*60*1000);

"""

pd.read_sql_query(query,connection)

Unnamed: 0,track_id,name,album_id,media_type_id,genre_id,composer,milliseconds,bytes,unit_price
0,1581,Dazed And Confused,127,1,1,Jimmy Page/Led Zeppelin,1116734,36052247,0.99
1,2429,We've Got To Get Together/Jingo,198,1,1,,1070027,34618222,0.99


## Filtering III - Special Comparison Operators

LIKE:

In [13]:
query = """

SELECT 
    *
FROM 
    customer
WHERE 
    first_name LIKE '%belle%';

"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,company,address,city,state,country,postal_code,phone,fax,email,support_rep_id
0,43,Isabelle,Mercier,,"68, Rue Jouvence",Dijon,,France,21000,+33 03 80 73 66 99,,isabelle_mercier@apple.fr,3


Patterns:

In [15]:
query = """

SELECT
    *
FROM
    customer
WHERE
    -- first_name has three occurences of a or A
    first_name LIKE '%a%a%a';


"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,company,address,city,state,country,postal_code,phone,fax,email,support_rep_id
0,35,Madalena,Sampaio,,"Rua dos Campeões Europeus de Viena, 4350",Porto,,Portugal,,+351 (225) 022-448,,masampaio@sapo.pt,4


IN a List:

In [16]:
query = """

SELECT 
    *
FROM 
    customer
WHERE
    state IN ('CT', 'DE', 'FL', 'GA', 'MA', 'MD', 'ME', 'NC', 'NH', 'NJ', 'NY', 'RI', 'SC', 'VA');

"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,company,address,city,state,country,postal_code,phone,fax,email,support_rep_id
0,18,Michelle,Brooks,,627 Broadway,New York,NY,USA,10012-2612,+1 (212) 221-3546,+1 (212) 221-4679,michelleb@aol.com,3
1,22,Heather,Leacock,,120 S Orange Ave,Orlando,FL,USA,32801,+1 (407) 999-7788,,hleacock@gmail.com,4
2,23,John,Gordon,,69 Salem Street,Boston,MA,USA,2113,+1 (617) 522-1333,,johngordon22@yahoo.com,4


IS NULL:

In [23]:
query = """


SELECT 
    employee_id, 
    last_name, 
    first_name, 
    title, 
    reports_to
FROM 
    employee
WHERE 
    reports_to <> 1 OR reports_to IS NULL;


"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,last_name,first_name,title,reports_to
0,1,Adams,Andrew,General Manager,
1,3,Peacock,Jane,Sales Support Agent,2.0
2,4,Park,Margaret,Sales Support Agent,2.0
3,5,Johnson,Steve,Sales Support Agent,2.0
4,7,King,Robert,IT Staff,6.0
5,8,Callahan,Laura,IT Staff,6.0


## Control Flow

If... Then:

In [24]:
query = """

SELECT
    invoice_id, 
    total,
    (CASE
        WHEN (total > '10') THEN 'High'
        ELSE 'Low'
     END)
     AS total_category
FROM
    invoice;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,total,total_category
0,1,15.84,High
1,2,9.90,Low
2,3,1.98,Low
3,4,7.92,Low
4,5,16.83,High
...,...,...,...
609,610,6.93,Low
610,611,1.98,Low
611,612,11.88,High
612,613,8.91,Low


Multiple If... Thens:

In [19]:
query = """

SELECT
    invoice_id, 
    total,
    (CASE
        WHEN (total > 10) THEN 'High'
        WHEN (5 < total AND total <= 10) THEN 'Medium'
        WHEN (total <= 5) THEN 'Low'
        ELSE 'Unknown'
    END) AS total_category
FROM
invoice;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,invoice_id,total,total_category
0,1,15.84,High
1,2,9.90,Medium
2,3,1.98,Low
3,4,7.92,Medium
4,5,16.83,High
...,...,...,...
609,610,6.93,Medium
610,611,1.98,Low
611,612,11.88,High
612,613,8.91,Medium


Or ELSE ...:

In [20]:
query = """


SELECT first_name, last_name, title, reports_to,
       (CASE
         WHEN reports_to = 1 THEN 'Adams'
         WHEN reports_to = 2 THEN 'Edwards'
         WHEN reports_to = 6 THEN 'Mitchell'
         WHEN reports_to IS NULL THEN 'Self'
        END) AS manager
FROM employee;


"""

pd.read_sql_query(query,connection)

Unnamed: 0,first_name,last_name,title,reports_to,manager
0,Andrew,Adams,General Manager,,Self
1,Nancy,Edwards,Sales Manager,1.0,Adams
2,Jane,Peacock,Sales Support Agent,2.0,Edwards
3,Margaret,Park,Sales Support Agent,2.0,Edwards
4,Steve,Johnson,Sales Support Agent,2.0,Edwards
5,Michael,Mitchell,IT Manager,1.0,Adams
6,Robert,King,IT Staff,6.0,Mitchell
7,Laura,Callahan,IT Staff,6.0,Mitchell


CASE Base Expression:

In [22]:
query = """

SELECT 
    first_name, 
    last_name, 
    title, 
    reports_to,
    (CASE reports_to
        WHEN  1 THEN 'Adams'
        WHEN  2 THEN 'Edwards'
        WHEN  6 THEN 'Mitchell'
        ELSE 'Self'
    END) AS manager
FROM 
    employee;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,first_name,last_name,title,reports_to,manager
0,Andrew,Adams,General Manager,,Self
1,Nancy,Edwards,Sales Manager,1.0,Adams
2,Jane,Peacock,Sales Support Agent,2.0,Edwards
3,Margaret,Park,Sales Support Agent,2.0,Edwards
4,Steve,Johnson,Sales Support Agent,2.0,Edwards
5,Michael,Mitchell,IT Manager,1.0,Adams
6,Robert,King,IT Staff,6.0,Mitchell
7,Laura,Callahan,IT Staff,6.0,Mitchell


## Ordering Results

ORDER BY:

In [26]:
query = """

SELECT 
    track_id, name, milliseconds
FROM 
    track
ORDER BY 
    milliseconds
LIMIT
    3;
    
"""

pd.read_sql_query(query,connection)

Unnamed: 0,track_id,name,milliseconds
0,2461,É Uma Partida De Futebol,1071
1,168,Now Sports,4884
2,170,A Statistic,6373


In [27]:
query = """

SELECT 
    employee_id, first_name, last_name, reports_to
FROM 
    employee
ORDER BY 
    reports_to, last_name;
    
"""

pd.read_sql_query(query,connection)

Unnamed: 0,employee_id,first_name,last_name,reports_to
0,1,Andrew,Adams,
1,2,Nancy,Edwards,1.0
2,6,Michael,Mitchell,1.0
3,5,Steve,Johnson,2.0
4,4,Margaret,Park,2.0
5,3,Jane,Peacock,2.0
6,8,Laura,Callahan,6.0
7,7,Robert,King,6.0


ORDER BY DESC:

In [30]:
query = """

SELECT
customer_id, first_name, last_name, country
FROM
customer
ORDER BY
country DESC, first_name, last_name
LIMIT
5;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,country
0,52,Emma,Jones,United Kingdom
1,53,Phil,Hughes,United Kingdom
2,54,Steve,Murray,United Kingdom
3,20,Dan,Miller,USA
4,16,Frank,Harris,USA


Order by Expressions:

In [34]:
query = """

SELECT
customer_id,first_name,last_name,country,
(CASE country
 WHEN 'Canada' THEN 1
 WHEN 'USA' THEN 2
 ELSE 3
 END) AS new_order
FROM 
customer
ORDER BY 
new_order, country
LIMIT
10;

"""

pd.read_sql_query(query,connection)

Unnamed: 0,customer_id,first_name,last_name,country,new_order
0,3,François,Tremblay,Canada,1
1,14,Mark,Philips,Canada,1
2,15,Jennifer,Peterson,Canada,1
3,29,Robert,Brown,Canada,1
4,30,Edward,Francis,Canada,1
5,31,Martha,Silk,Canada,1
6,32,Aaron,Mitchell,Canada,1
7,33,Ellie,Sullivan,Canada,1
8,16,Frank,Harris,USA,2
9,17,Jack,Smith,USA,2


In [None]:
query = """



"""

pd.read_sql_query(query,connection)

In [None]:
query = """



"""

pd.read_sql_query(query,connection)