In [1]:
import sys
import pandas as pd
from pandas.io import sql
from pandas.io.sql import read_sql
from pandas.io.sql import to_sql

#sqlalchemy
import sqlalchemy
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, ForeignKey

# for postgres
import psycopg2

import warnings
warnings.filterwarnings("ignore")

print('OK')

OK


  """)


In [2]:
# open a new connection to pyanalysis
engine = create_engine('postgresql://postgres@localhost:5432/pyanalysis')
print("OK")

OK


In [3]:
# This will immediately do a sql command
#engine.execute("command")

# but we want to tie this in with pandas, so we'll save the commands in a variable and use it like so
# read with dates parsed as dates

sql = "SELECT * FROM teachers"
teachers = pd.read_sql(sql, engine, parse_dates=['hire_date'])

print(teachers.dtypes)

teachers.head(6)


id                     int64
first_name            object
last_name             object
school                object
hire_date     datetime64[ns]
salary               float64
dtype: object


Unnamed: 0,id,first_name,last_name,school,hire_date,salary
0,1,Janet,Smith,F.D. Roosevelt HS,2011-10-30,36200.0
1,2,Lee,Reynolds,F.D. Roosevelt HS,1993-05-22,65000.0
2,3,Samuel,Cole,Myers Middle School,2005-08-01,43500.0
3,4,Samantha,Bush,Myers Middle School,2011-10-30,36200.0
4,5,Betty,Diaz,Myers Middle School,2005-08-30,43500.0
5,6,Kathleen,Roush,F.D. Roosevelt HS,2010-10-22,38500.0


In [4]:
# let's select certain columns 
sql = "SELECT last_name, first_name, salary FROM teachers"

teachers = pd.read_sql(sql, engine)

teachers.head(6)

Unnamed: 0,last_name,first_name,salary
0,Smith,Janet,36200.0
1,Reynolds,Lee,65000.0
2,Cole,Samuel,43500.0
3,Bush,Samantha,36200.0
4,Diaz,Betty,43500.0
5,Roush,Kathleen,38500.0


In [5]:
# Just the distinct names of schools

sql = "SELECT DISTINCT school FROM teachers"

teachers = pd.read_sql(sql, engine)

teachers.head(6)

Unnamed: 0,school
0,Myers Middle School
1,F.D. Roosevelt HS


In [6]:
# distinct schools and salaries

sql = "SELECT DISTINCT school, salary FROM teachers"

teachers = pd.read_sql(sql, engine)

teachers.head(6)

Unnamed: 0,school,salary
0,Myers Middle School,36200.0
1,F.D. Roosevelt HS,65000.0
2,Myers Middle School,43500.0
3,F.D. Roosevelt HS,38500.0
4,F.D. Roosevelt HS,36200.0


In [7]:
# Order by feeds sorted info into the pandas df

sql = "SELECT first_name, last_name, salary FROM teachers ORDER BY salary DESC"

teachers = pd.read_sql(sql, engine)

teachers.head(6)

Unnamed: 0,first_name,last_name,salary
0,Lee,Reynolds,65000.0
1,Samuel,Cole,43500.0
2,Betty,Diaz,43500.0
3,Kathleen,Roush,38500.0
4,Janet,Smith,36200.0
5,Samantha,Bush,36200.0


In [8]:
# how can I do a multi-line sql command for claritiy?
sql = "SELECT first_name, last_name, salary"
sql += " FROM teachers"
sql += " ORDER BY salary DESC"

print(sql)


SELECT first_name, last_name, salary FROM teachers ORDER BY salary DESC


In [9]:
# does it run? Yes!

teachers = pd.read_sql(sql, engine)

teachers.head(6)


Unnamed: 0,first_name,last_name,salary
0,Lee,Reynolds,65000.0
1,Samuel,Cole,43500.0
2,Betty,Diaz,43500.0
3,Kathleen,Roush,38500.0
4,Janet,Smith,36200.0
5,Samantha,Bush,36200.0


In [10]:
# ordering by two different things
sql = "SELECT last_name, school, hire_date"
sql += " FROM teachers"
sql += " ORDER BY school ASC, hire_date DESC"

teachers = pd.read_sql(sql, engine, parse_dates=['hire_date'])

teachers.head(6)

Unnamed: 0,last_name,school,hire_date
0,Smith,F.D. Roosevelt HS,2011-10-30
1,Roush,F.D. Roosevelt HS,2010-10-22
2,Reynolds,F.D. Roosevelt HS,1993-05-22
3,Bush,Myers Middle School,2011-10-30
4,Diaz,Myers Middle School,2005-08-30
5,Cole,Myers Middle School,2005-08-01


In [11]:
# filtering using WHERE
sql = "SELECT last_name, school, hire_date"
sql += " FROM teachers"
sql += " WHERE school = 'Myers Middle School'"

teachers = pd.read_sql(sql, engine, parse_dates=['hire_date'])

teachers.head(6)

Unnamed: 0,last_name,school,hire_date
0,Cole,Myers Middle School,2005-08-01
1,Bush,Myers Middle School,2011-10-30
2,Diaz,Myers Middle School,2005-08-30


In [12]:
# filtering using WHERE
# and different comparison operators

# this is does not equal
#sql = "SELECT school"
#sql += " FROM teachers"
#sql += " WHERE school != 'Myers Middle School'"
#teachers = pd.read_sql(sql, engine)

# hire date is before
#sql = "SELECT first_name, last_name, hire_date"
#sql += " FROM teachers"
#sql += " WHERE hire_date < '2006-01-01'"
#teachers = pd.read_sql(sql, engine, parse_dates=['hire_date'])

# salary is between
sql = "SELECT first_name, last_name, salary"
sql += " FROM teachers"
sql += " WHERE salary BETWEEN 40000 AND 65000"
teachers = pd.read_sql(sql, engine)

teachers.head(6)

Unnamed: 0,first_name,last_name,salary
0,Lee,Reynolds,65000.0
1,Samuel,Cole,43500.0
2,Betty,Diaz,43500.0


In [13]:
# like and ILIKE

# like is case sensitive
# and sql requests using % wildcard need to be escaped using %%
sql = "SELECT first_name"
sql += " FROM teachers"
sql += " WHERE first_name LIKE 'Sam%%'"
teachers = pd.read_sql(sql, engine)

teachers.head(6)

Unnamed: 0,first_name
0,Samuel
1,Samantha


In [14]:
# trying something different
# first, using triple """

sql = """
    SELECT first_name
    FROM teachers
    WHERE first_name LIKE 'Sam%'
    """
# sqlalchemy.text(sql) escapes the text

teachers = pd.read_sql(sqlalchemy.text(sql), engine)

teachers.head(6)



Unnamed: 0,first_name
0,Samuel
1,Samantha


In [15]:
# Now ILIKE is not case sensitive
sql = """
    SELECT first_name
    FROM teachers
    WHERE first_name ILIKE 'sam%'
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine)

teachers.head(6)


Unnamed: 0,first_name
0,Samuel
1,Samantha


In [16]:
# combining operators

sql = """
    SELECT *
    FROM teachers
    WHERE school = 'Myers Middle School'
    AND salary < 40000
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine, parse_dates=['hire_date'])
# remember to parse dates
teachers.head(6)


Unnamed: 0,id,first_name,last_name,school,hire_date,salary
0,4,Samantha,Bush,Myers Middle School,2011-10-30,36200.0


In [18]:
# combining operators

sql = """
    SELECT *
    FROM teachers
    WHERE school = 'F.D. Roosevelt HS'
      AND (salary < 38000 OR salary > 40000)
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine, parse_dates=['hire_date'])
# remember to parse dates
teachers.head(6)


Unnamed: 0,id,first_name,last_name,school,hire_date,salary
0,1,Janet,Smith,F.D. Roosevelt HS,2011-10-30,36200.0
1,2,Lee,Reynolds,F.D. Roosevelt HS,1993-05-22,65000.0


In [19]:
# where and order by
sql = """
    SELECT first_name, last_name, school, hire_date, salary
    FROM teachers
    WHERE school LIKE '%Roos%'
    ORDER BY hire_date DESC
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine, parse_dates=['hire_date'])
# remember to parse dates
teachers.head(6)


Unnamed: 0,first_name,last_name,school,hire_date,salary
0,Janet,Smith,F.D. Roosevelt HS,2011-10-30,36200.0
1,Kathleen,Roush,F.D. Roosevelt HS,2010-10-22,38500.0
2,Lee,Reynolds,F.D. Roosevelt HS,1993-05-22,65000.0


In [20]:
# try it yourself
# list schools, then teachers in order

sql = """
    SELECT first_name, last_name, school
    FROM teachers
    ORDER BY school ASC, last_name ASC
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine)

teachers.head(6)


Unnamed: 0,first_name,last_name,school
0,Lee,Reynolds,F.D. Roosevelt HS
1,Kathleen,Roush,F.D. Roosevelt HS
2,Janet,Smith,F.D. Roosevelt HS
3,Samantha,Bush,Myers Middle School
4,Samuel,Cole,Myers Middle School
5,Betty,Diaz,Myers Middle School


In [21]:
# try it yourself
# find teacher whose first name starts with S
# and who makes more than $40,000

sql = """
    SELECT first_name, last_name, school, salary
    FROM teachers
    WHERE first_name ILIKE 's%'
    AND salary > 40000
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine)

teachers.head(6)


Unnamed: 0,first_name,last_name,school,salary
0,Samuel,Cole,Myers Middle School,43500.0


In [22]:
# try it yourself
# rank teachers hired since 2010-01-01, ordered by pay

sql = """
    SELECT first_name, last_name, school, salary, hire_date
    FROM teachers
    WHERE hire_date > '2010-01-01'
    ORDER BY salary DESC
    """
teachers = pd.read_sql(sqlalchemy.text(sql), engine, parse_dates=['hire_date'])

teachers.head(6)


Unnamed: 0,first_name,last_name,school,salary,hire_date
0,Kathleen,Roush,F.D. Roosevelt HS,38500.0,2010-10-22
1,Janet,Smith,F.D. Roosevelt HS,36200.0,2011-10-30
2,Samantha,Bush,Myers Middle School,36200.0,2011-10-30


In [None]:
# end chapter 2