In [1]:
import psycopg2 as pg2
import pandas as pd

In [147]:
conn = pg2.connect(database='dvdrental', user='kevindeeboman')
cur = conn.cursor()

In [157]:
def show_res(cur, head=False):
    res = cur.fetchall()
    col_names = [desc[0] for desc in cur.description]
    df = pd.DataFrame(data= res, columns=col_names)
    if head == True:
        display(df.head(3))
    else:
        display(df)

In [158]:
# Select statements # SQL syntax is written i captial letters! #
cur.execute("""
    SELECT * FROM film;
    """)
show_res(cur, head=True)

# * references all columns, but we can also pick which cols we want! #
cur.execute("""
    SELECT first_name, last_name, email FROM customer;
    """)
show_res(cur, head=True)

Unnamed: 0,film_id,title,description,release_year,language_id,rental_duration,rental_rate,length,replacement_cost,rating,last_update,special_features,fulltext
0,133,Chamber Italian,A Fateful Reflection of a Moose And a Husband ...,2006,1,7,4.99,117,14.99,NC-17,2013-05-26 14:50:58.951,[Trailers],'chamber':1 'fate':4 'husband':11 'italian':2 ...
1,384,Grosse Wonderful,A Epic Drama of a Cat And a Explorer who must ...,2006,1,5,4.99,49,19.99,R,2013-05-26 14:50:58.951,[Behind the Scenes],'australia':18 'cat':8 'drama':5 'epic':4 'exp...
2,8,Airport Pollock,A Epic Tale of a Moose And a Girl who must Con...,2006,1,6,4.99,54,15.99,R,2013-05-26 14:50:58.951,[Trailers],'airport':1 'ancient':18 'confront':14 'epic':...


Unnamed: 0,first_name,last_name,email
0,Jared,Ely,jared.ely@sakilacustomer.org
1,Mary,Smith,mary.smith@sakilacustomer.org
2,Patricia,Johnson,patricia.johnson@sakilacustomer.org


In [48]:
# Aggregate functions - Distinct # Distinct show unique values for a column #
cur.execute("""
    SELECT DISTINCT(rating) FROM film;
    """)
show_res(cur)

# Aggregate functions can also be used together #
cur.execute("""
    SELECT COUNT(DISTINCT(rating)) FROM film;
    """)
show_res(cur)
# We find that there are 5 unique movie ratings #

Unnamed: 0,rating
0,R
1,PG
2,PG-13
3,NC-17
4,G


Unnamed: 0,count
0,5


In [68]:
# WHERE statement # Conditions for queries # WHERE can be combine with Logical operators --> AND, OR, NOT #
cur.execute("""
    SELECT title, rating, length, rental_rate FROM film
    WHERE rating = 'R' AND rental_rate < 4.99 AND NOT length < 175
    ;
    """)
show_res(cur, head=True)
# Here we filter out all R-rated films that have a rental rate lower than 4.99 which are not shorter than 175 min #

Unnamed: 0,title,rating,length,rental_rate
0,Analyze Hoosiers,R,181,2.99
1,Cause Date,R,179,2.99
2,Double Wrath,R,177,0.99


In [108]:
# ORDER BY # Sort values by specification # Most commonly by ASC (ascending order) or DESC (descending order) #
# Below we see that we are sorting by store_id by descending order and first_name by ascending order #
cur.execute("""
    SELECT store_id, first_name, last_name FROM customer
    ORDER BY store_id DESC, first_name ASC
    ;
    """)
show_res(cur, head=True)
# NOTE -> Sorting should be performed last, selection and filtering should be performed first #

Unnamed: 0,store_id,first_name,last_name
0,2,Aaron,Selby
1,2,Adrian,Clary
2,2,Agnes,Bishop


In [110]:
# LIMIT # Simply limits the number of returned results #
cur.execute("""
    SELECT store_id, first_name, last_name FROM customer
    ORDER BY store_id DESC, first_name ASC
    LIMIT 10
    ;
    """)
show_res(cur)
# LIMIT should ALWAYS be stated LAST #

Unnamed: 0,store_id,first_name,last_name
0,2,Aaron,Selby
1,2,Adrian,Clary
2,2,Agnes,Bishop
3,2,Alberto,Henning
4,2,Alex,Gresham
5,2,Alexander,Fennell
6,2,Alfred,Casillas
7,2,Alfredo,Mcadams
8,2,Allen,Butterfield
9,2,Allison,Stanley


In [134]:
# ORDER BY w. LIMIT # Here we show the latest 5 payments and the total amount spent #
cur.execute("""
    SELECT * FROM payment
    WHERE amount <> 0.00
    ORDER BY payment_date DESC
    LIMIT 5
    ;
    """)
show_res(cur)
# Similarly we can explore the top 5 shortest films #
cur.execute("""
    SELECT title, length FROM film
    ORDER BY length ASC
    LIMIT 5
    ;
    """)
show_res(cur)
# How many films have a length of 50 min or less? #
cur.execute("""
    SELECT COUNT(*) FROM film
    WHERE length <= 50
    ;
    """)
show_res(cur)

Unnamed: 0,payment_id,customer_id,staff_id,rental_id,amount,payment_date
0,31922,279,2,13538,4.99,2007-05-14 13:44:29.996577
1,31917,267,2,12066,7.98,2007-05-14 13:44:29.996577
2,31919,269,1,13025,3.98,2007-05-14 13:44:29.996577
3,31921,274,1,13486,0.99,2007-05-14 13:44:29.996577
4,31923,282,2,15430,0.99,2007-05-14 13:44:29.996577


Unnamed: 0,title,length
0,Labyrinth League,46
1,Alien Center,46
2,Iron Moon,46
3,Kwai Homeward,46
4,Ridgemont Submarine,46


Unnamed: 0,count
0,37


In [137]:
# BETWEEN operator # 
# value BETWEEN low AND high == value >= low AND value <= high # BETWEEN IS INCLUSIVE #
# We can also use -> value NOT BETWEEN low AND high == value < low AND value > high # NOT BETWEEN IS EXCLUSIVE #
cur.execute("""
    SELECT * FROM payment
    WHERE payment_date BETWEEN '2007-02-01' AND '2007-02-15'
    ;
    """)
show_res(cur, head=True)

Unnamed: 0,payment_id,customer_id,staff_id,rental_id,amount,payment_date
0,17610,368,1,1186,0.99,2007-02-14 23:25:11.996577
1,17617,370,2,1190,6.99,2007-02-14 23:33:58.996577
2,17743,402,2,1194,4.99,2007-02-14 23:53:34.996577


In [143]:
# IN operator # To check for multiple conditions/values we use IN #
# This could also be done using WHERE together with OR statements #
cur.execute("""
    SELECT * FROM film_list
    WHERE category IN ('Horror', 'Comedy')
    ;
    """)
show_res(cur, head=True)
# Similarly, we can do the opposite #
cur.execute("""
    SELECT * FROM film_list
    WHERE category NOT IN ('Horror', 'Comedy')
    ;
    """)
show_res(cur, head=True)

Unnamed: 0,fid,title,description,category,price,length,rating,actors
0,308,Ferris Mother,A Touching Display of a Frisbee And a Frisbee ...,Comedy,2.99,142,PG,Sissy Sobieski
1,857,Strictly Scarface,A Touching Reflection of a Crocodile And a Dog...,Comedy,2.99,144,PG-13,"Greg Chaplin, Daryl Crawford, Whoopi Hurt, Ala..."
2,494,Karate Moon,A Astounding Yarn of a Womanizer And a Dog who...,Horror,0.99,120,PG-13,"Johnny Cage, Susan Davis, Julianne Dench, Jane..."


Unnamed: 0,fid,title,description,category,price,length,rating,actors
0,730,Ridgemont Submarine,A Unbelieveable Drama of a Waitress And a Comp...,New,0.99,46,PG-13,"Johnny Lollobrigida, Julianne Dench, Whoopi Hu..."
1,892,Titanic Boondock,A Brilliant Reflection of a Feminist And a Dog...,Animation,4.99,104,R,"Bette Nicholson, Dan Harris, Penelope Cronyn, ..."
2,286,Enough Raging,A Astounding Character Study of a Boat And a S...,Travel,2.99,158,NC-17,"Johnny Lollobrigida, Sandra Peck, Sean William..."


In [164]:
# LIKE (CASE SENSITIVE) and ILIKE (NOT CASE SENSITIVE) # Pattern matching using wildcard characters % and _ #
# % == Matches any sequence of characters # _ == matches any single character #
# Here we find all names where the second/third character is "oh" #
# The first char is one random -> _ # After h, an unspecified number of random chars may be present #
cur.execute("""
    SELECT first_name FROM customer
    WHERE first_name ILIKE '_oh%'
    ;
    """)
show_res(cur)

cur.execute("""
    SELECT * FROM film_list
    WHERE description ILIKE '%sql%'
    ;
    """)
show_res(cur, head=True)
# SQL fully supports Regex, google that shit if needed #

Unnamed: 0,first_name
0,John
1,Johnny
2,Johnnie


Unnamed: 0,fid,title,description,category,price,length,rating,actors
0,907,Translation Summer,A Touching Reflection of a Man And a Monkey wh...,Drama,0.99,168,PG-13,"Tom Miranda, Laura Brody, Matthew Carrey"
1,494,Karate Moon,A Astounding Yarn of a Womanizer And a Dog who...,Horror,0.99,120,PG-13,"Johnny Cage, Susan Davis, Julianne Dench, Jane..."
2,918,Twisted Pirates,A Touching Display of a Frisbee And a Boat who...,Children,4.99,152,PG,"Natalie Hopkins, Jude Cruise, Adam Grant"
