# Importing data with relational databases - SQL
- create SQL queries
- filtering and ordering SQL records
- advanced querying by JOINing database tables

Relational databases
- A SQL table is like a Pandas dataframe
- A table has a key that links to other tables

Versions
- PostgreSQL
- MySQL
- SQLite

In [None]:
# Example: SQLite database and SQLAlchemy

# create a database engine
from sqlalchemy import create_engine

# use a connection string: type and name of database
engine = create_engine('sqlite:///Northwind.sqlite')

# figure out table names
table_names = engine.table_names()
print(table_names)

# connect to engine to query





# basic SQL query
### select all columns and rows of table
SELECT *
FROM Table_Name

# Workflow of SQL querying
- Import packages and functions
- create the DB engine
- connect to the engine
- query the DB
- save query results to a DataFrame
- close the connection

In [None]:
from sqlalchemy import create_engine
import pandas as pd

# create the database engine
engine = create_engine('sqlite:///Northwind.sqlite')
# connect to the engine
con = engine.connect()
# query the database
rs = con.execute("SELECT * FROM Orders")
# convert to dataframe
df = pd.DataFrame(rs.fetchall())
# set column names
df.columns = rs.keys()
# close the connection
con.close()

print(df.head())

In [None]:
# using context manager so you don't have to manually close the connection
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('sqlite:///Northwind.sqlite')

with engine.connect() as con:
    # select columns
    rs = con.execute("SELECT OrderID, OrderDate, ShipName FROM Orders")
    # select 5 rows with size arg
    df = pd.DataFrame(rs.fetchmany(size=5))
    df.columns = rs.keys()
    
print(len(df))
print(df.head())

# Filter with WHERE

In [None]:
# example

# Create engine: engine
engine = create_engine('sqlite:///Chinook.sqlite')

# Open engine in context manager
# Perform query and save results to DataFrame: df
with engine.connect() as con:
    rs = con.execute("SELECT * FROM Employee WHERE EmployeeId >= 6")
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()

# Print the head of the DataFrame df
print(df.head())

# Order with ORDER BY

In [None]:
# Create engine: engine
engine = create_engine('sqlite:///Chinook.sqlite')

# Open engine in context manager
with engine.connect() as con:
    rs = con.execute("SELECT * FROM Employee ORDER BY Birthdate")
    df = pd.DataFrame(rs.fetchall())

    # Set the DataFrame's column names
    df.columns = rs.keys()

# Print head of DataFrame
print(df.head())

# Query directly with Pandas
- use 1 line of code instead of 4 lines

In [None]:
df = pd.read_sql_query("SELECT * FROM Orders", engine)

In [None]:
# Import packages
from sqlalchemy import create_engine
import pandas as pd

# Create engine: engine
engine = create_engine('sqlite:///Chinook.sqlite')

# Execute query and store records in DataFrame: df
df = pd.read_sql_query("SELECT * FROM Album", engine)

# Print head of DataFrame
print(df.head())

# Open engine in context manager
# Perform query and save results to DataFrame: df1
with engine.connect() as con:
    rs = con.execute("SELECT * FROM Album")
    df1 = pd.DataFrame(rs.fetchall())
    df1.columns = rs.keys()

# Confirm that both methods yield the same result: does df = df1 ?
print(df.equals(df1))

In [None]:
# example: more complex querying
# a SELECT statement followed by both a WHERE clause AND an ORDER BY clause.

# Import packages
from sqlalchemy import create_engine
import pandas as pd

# Create engine: engine
engine = create_engine('sqlite:///Chinook.sqlite')

# Execute query and store records in DataFrame: df
df = pd.read_sql_query("SELECT * FROM Employee WHERE EmployeeId >= 6 ORDER BY Birthdate", engine)

# Print head of DataFrame
print(df.head())

# Advanced query: exploiting table relationships with INNER JOIN
- note: there are other types of JOINs


### INNER JOIN

In [None]:
# example: INNER JOIN

# Open engine in context manager
# Perform query and save results to DataFrame: df
with engine.connect() as con:
    rs = con.execute("SELECT Title, Name FROM Album INNER JOIN Artist on Album.ArtistID = Artist.ArtistID")
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()

# Print head of DataFrame df
print(df.head())

### Filter INNER JOIN with WHERE clause

In [None]:
# Execute query and store records in DataFrame: df
df = pd.read_sql_query("SELECT * FROM PlaylistTrack INNER JOIN Track on PlaylistTrack.TrackId = Track.TrackId WHERE Milliseconds < 250000",  engine)

# Print head of DataFrame
print(df.head())