In [34]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings("ignore")

In [36]:
import sys
sys.path.append("../../../") 

from utils.paths import make_dir_line

modality = 'c'
project = 'Intermediate SQL'
data = make_dir_line(modality, project)

raw = data('raw')

In [37]:
import sqlite3

conn = sqlite3.connect(":memory:")  ## aca se indica el nombre de la db.
cur = conn.cursor()

# 4.2.0 Filtering Records

In [38]:
conn.executescript(
    """
DROP TABLE IF EXISTS films;

CREATE TABLE films (
    id            INT,
    title         STRING,
    release_year  INT,
    country       STRING,
    duration      INT,
    language      STRING,
    certification STRING,
    gross         DOUBLE,
    budget        DOUBLE
);

DROP TABLE IF EXISTS people;

CREATE TABLE people (
    id        INT,
    name      STRING,
    birthdate STRING,
    deathdate STRING
);

DROP TABLE IF EXISTS reviews;

CREATE TABLE reviews (
    id             INT,
    film_id        INT,
    num_user       INT,
    num_critic     INT,
    imdb_score     DOUBLE,
    num_votes      DOUBLE,
    facebook_likes DOUBLE
);

DROP TABLE IF EXISTS roles;

CREATE TABLE roles (
    id        INT,
    film_id   INT,
    person_id INT,
    role      STRING
);

"""
)
conn.commit()

In [39]:
df = pd.read_csv(raw / 'films.csv', sep = ',', decimal = '.', header = None, encoding = 'utf-8')
df.columns = ['id','title','release_year','country','duration','language','certification','gross','budget']
films = list(zip(df.id, df.title, df.release_year, df.country, df.duration, df.language, df.certification, df.gross, df.budget))
cur.executemany("INSERT INTO films VALUES  (?,?,?,?,?,?,?,?,?)", films)

<sqlite3.Cursor at 0x7f74ce3bbcc0>

In [40]:
df = pd.read_csv(raw / 'people.csv', sep = ',', decimal = '.', header = None, encoding = 'utf-8')
df.columns = ['id','name','birthdate','deathdate']
people = list(zip(df.id, df.name, df.birthdate, df.deathdate))
cur.executemany("INSERT INTO people VALUES  (?,?,?,?)", people)

<sqlite3.Cursor at 0x7f74ce3bbcc0>

In [41]:
df = pd.read_csv(raw / 'reviews.csv', sep = ',', decimal = '.', header = 0, encoding = 'utf-8')
df.columns = ['id','film_id','num_user','num_critic','imdb_score','num_votes','facebook_likes']
df = df.loc[:,['id','film_id','num_user','num_critic','imdb_score','num_votes','facebook_likes']]
reviews = list(zip(df.id, df.film_id, df.num_user, df.num_critic, df.imdb_score, df.num_votes, df.facebook_likes))
cur.executemany("INSERT INTO reviews VALUES  (?,?,?,?,?,?,?)", reviews)

<sqlite3.Cursor at 0x7f74ce3bbcc0>

In [42]:
df = pd.read_csv(raw / 'roles.csv', sep = ',', decimal = '.', header = None, encoding = 'utf-8')
df.columns = ['id','film_id','person_id','role']
roles = list(zip(df.id, df.film_id, df.person_id, df.role))
cur.executemany("INSERT INTO roles VALUES  (?,?,?,?)", roles)

<sqlite3.Cursor at 0x7f74ce3bbcc0>

## 0.2.2 Filtering results

In [43]:
# -- What does the following query return?
cur.execute("""
            
            SELECT title 
            FROM films 
            WHERE release_year > 2000;
            
            """).fetchall()

[('15 Minutes',),
 ('3000 Miles to Graceland',),
 ('A Beautiful Mind',),
 ("A Knight's Tale",),
 ('A.I. Artificial Intelligence',),
 ('Ali',),
 ('Alias Betty',),
 ("All the Queen's Men",),
 ('Along Came a Spider',),
 ('AmÃ©lie',),
 ("America's Sweethearts",),
 ('American Desi',),
 ('American Outlaws',),
 ('American Pie 2',),
 ('Angel Eyes',),
 ('Antitrust',),
 ('Atlantis: The Lost Empire',),
 ('Ayurveda: Art of Being',),
 ('Baby Boy',),
 ('Bandits',),
 ('Behind Enemy Lines',),
 ('Birthday Girl',),
 ('Black Hawk Down',),
 ('Black Knight',),
 ('Blow',),
 ("Bridget Jones's Diary",),
 ('Brigham City',),
 ('Bubble Boy',),
 ('Buffalo Soldiers',),
 ('Bully',),
 ("Captain Corelli's Mandolin",),
 ('Cats & Dogs',),
 ('Corky Romano',),
 ('Crazy/Beautiful',),
 ('Crocodile Dundee in Los Angeles',),
 ('Dogtown and Z-Boys',),
 ('Domestic Disturbance',),
 ("Don't Say a Word",),
 ('Donnie Darko',),
 ('Double Take',),
 ('Down and Out with the Dolls',),
 ('Down to Earth',),
 ('Dr. Dolittle 2',),
 ('Drive

## 0.2.3 Using WHERE with numbers

In [44]:
# -- Select film_ids and imdb_score with an imdb_score over 7.0
cur.execute("""
            
            SELECT film_id, imdb_score 
            FROM reviews 
            WHERE imdb_score > 7.0;
            
            """).fetchall()

[(3934, 7.0999999),
 (74, 7.5999999),
 (1254, 8.0),
 (4841, 8.10000038),
 (3252, 7.19999981),
 (1181, 7.30000019),
 (3929, 7.0999999),
 (3298, 7.4000001),
 (2744, 7.4000001),
 (4707, 7.4000001),
 (3879, 7.30000019),
 (3854, 7.5999999),
 (1526, 7.5999999),
 (1943, 8.19999981),
 (2331, 7.30000019),
 (2239, 7.4000001),
 (3016, 7.4000001),
 (2471, 7.0999999),
 (1379, 7.30000019),
 (4442, 7.9000001),
 (2118, 7.9000001),
 (4922, 7.30000019),
 (2576, 7.0999999),
 (788, 7.5),
 (578, 7.5),
 (368, 7.0999999),
 (4516, 7.4000001),
 (1354, 7.69999981),
 (269, 8.39999962),
 (613, 7.5999999),
 (3098, 7.5),
 (4314, 8.10000038),
 (4954, 7.5999999),
 (3318, 7.80000019),
 (2179, 8.10000038),
 (2470, 7.4000001),
 (2863, 8.0),
 (4150, 7.69999981),
 (35, 7.5),
 (1320, 7.30000019),
 (4246, 7.5999999),
 (4006, 7.30000019),
 (3955, 7.0999999),
 (600, 7.0999999),
 (2144, 7.5999999),
 (2049, 7.19999981),
 (2444, 7.19999981),
 (1443, 7.30000019),
 (3642, 8.30000019),
 (4542, 7.30000019),
 (1226, 7.30000019),
 (41

In [45]:
# -- Select film_ids and facebook_likes for ten records with less than 1000 likes 
cur.execute("""
            
            SELECT film_id, facebook_likes 
            FROM reviews 
            WHERE facebook_likes < 1000 
            LIMIT 10;
            
            """).fetchall()

[(3405, 0.0),
 (478, 491.0),
 (74, 930.0),
 (740, 0.0),
 (2869, 689.0),
 (1181, 0.0),
 (2020, 0.0),
 (2312, 912.0),
 (1820, 872.0),
 (831, 975.0)]

In [46]:
# -- Count the records with at least 100,000 votes
cur.execute("""
            
            SELECT COUNT (*) AS films_over_100K_votes 
            FROM reviews 
            WHERE num_votes > 100000;
            
            """).fetchall()

[(1211,)]

## 0.2.4 Using WHERE with text

In [47]:
# -- Count the Spanish-language films
cur.execute("""
            
            SELECT COUNT(language) AS count_spanish 
            FROM films 
            WHERE language = 'Spanish';
            
            """).fetchall()

[(40,)]

## 0.2.6 Using AND

In [48]:
# -- Select the title and release_year for all German-language films released before 2000
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE language = 'German' AND release_year < 2000;
            
            """).fetchall()

[('Metropolis', 1927),
 ("Pandora's Box", 1929),
 ('The Torture Chamber of Dr. Sadism', 1967),
 ('Das Boot', 1981),
 ('Run Lola Run', 1998),
 ('Aimee & Jaguar', 1999)]

In [49]:
# -- Update the query to see all German-language films released after 2000
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year > 2000 AND language = 'German';
            
            """).fetchall()

[('Good Bye Lenin!', 2003),
 ('Downfall', 2004),
 ('Summer Storm', 2004),
 ('The Lives of Others', 2006),
 ('The Baader Meinhof Complex', 2008),
 ('The Wave', 2008),
 ('Cargo', 2009),
 ('Soul Kitchen', 2009),
 ('The White Ribbon', 2009),
 (3, 2010),
 ('Animals United', 2010),
 ('Buen DÃ\xada, RamÃ³n', 2013)]

In [50]:
# -- Select all records for German-language films released after 2000 and before 2010
cur.execute("""
            
            SELECT * 
            FROM films 
            WHERE language = 'German' AND release_year > 2000 AND release_year < 2010;
            
            """).fetchall()

[(1952,
  'Good Bye Lenin!',
  2003,
  'Germany',
  121,
  'German',
  'R',
  4063859.0,
  4800000.0),
 (2130,
  'Downfall',
  2004,
  'Germany',
  178,
  'German',
  'R',
  5501940.0,
  13500000.0),
 (2224,
  'Summer Storm',
  2004,
  'Germany',
  98,
  'German',
  'R',
  95016.0,
  2700000.0),
 (2709,
  'The Lives of Others',
  2006,
  'Germany',
  137,
  'German',
  'R',
  11284657.0,
  2000000.0),
 (3100,
  'The Baader Meinhof Complex',
  2008,
  'Germany',
  184,
  'German',
  'R',
  476270.0,
  20000000.0),
 (3143, 'The Wave', 2008, 'Germany', 107, 'German', None, None, 5000000.0),
 (3220, 'Cargo', 2009, 'Switzerland', 112, 'German', None, None, 4500000.0),
 (3346,
  'Soul Kitchen',
  2009,
  'Germany',
  99,
  'German',
  None,
  274385.0,
  4000000.0),
 (3412,
  'The White Ribbon',
  2009,
  'Germany',
  144,
  'German',
  'R',
  2222647.0,
  12000000.0)]

## 0.2.7 Using OR

In [51]:
# Find the title and year of films from the 1990 or 1999 
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year = 1990 OR release_year = 1999;
            
            """).fetchall()

[('Arachnophobia', 1990),
 ('Back to the Future Part III', 1990),
 ("Child's Play 2", 1990),
 ('Dances with Wolves', 1990),
 ('Days of Thunder', 1990),
 ('Dick Tracy', 1990),
 ('Die Hard 2', 1990),
 ('Edward Scissorhands', 1990),
 ('Flatliners', 1990),
 ('Ghost', 1990),
 ('Goodfellas', 1990),
 ('Gremlins 2: The New Batch', 1990),
 ('Home Alone', 1990),
 ('Kindergarten Cop', 1990),
 ("Marilyn Hotchkiss' Ballroom Dancing and Charm School", 1990),
 ('Metropolitan', 1990),
 ('Midnight Cabaret', 1990),
 ("Mo' Better Blues", 1990),
 ('Predator 2', 1990),
 ('Pretty Woman', 1990),
 ('Quigley Down Under', 1990),
 ('Shipwrecked', 1990),
 ('Spaced Invaders', 1990),
 ('The Adventures of Ford Fairlane', 1990),
 ('The Godfather: Part III', 1990),
 ('The Hunt for Red October', 1990),
 ('Total Recall', 1990),
 ('Total Recall', 1990),
 ('Tremors', 1990),
 ('Two Evil Eyes', 1990),
 ('10 Things I Hate About You', 1999),
 ('200 Cigarettes', 1999),
 ('8MM', 1999),
 ('A Dog of Flanders', 1999),
 ('A Room fo

In [52]:
# Filter the records to only include English or Spanish-language films.
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE (release_year = 1990 OR release_year = 1999) AND (language = 'English' OR language = 'Spanish');
            
            """).fetchall()

[('Arachnophobia', 1990),
 ('Back to the Future Part III', 1990),
 ("Child's Play 2", 1990),
 ('Dances with Wolves', 1990),
 ('Days of Thunder', 1990),
 ('Dick Tracy', 1990),
 ('Die Hard 2', 1990),
 ('Edward Scissorhands', 1990),
 ('Flatliners', 1990),
 ('Ghost', 1990),
 ('Goodfellas', 1990),
 ('Gremlins 2: The New Batch', 1990),
 ('Home Alone', 1990),
 ('Kindergarten Cop', 1990),
 ("Marilyn Hotchkiss' Ballroom Dancing and Charm School", 1990),
 ('Metropolitan', 1990),
 ('Midnight Cabaret', 1990),
 ("Mo' Better Blues", 1990),
 ('Predator 2', 1990),
 ('Pretty Woman', 1990),
 ('Quigley Down Under', 1990),
 ('Shipwrecked', 1990),
 ('Spaced Invaders', 1990),
 ('The Adventures of Ford Fairlane', 1990),
 ('The Godfather: Part III', 1990),
 ('The Hunt for Red October', 1990),
 ('Total Recall', 1990),
 ('Total Recall', 1990),
 ('Tremors', 1990),
 ('Two Evil Eyes', 1990),
 ('10 Things I Hate About You', 1999),
 ('200 Cigarettes', 1999),
 ('8MM', 1999),
 ('A Dog of Flanders', 1999),
 ('A Room fo

In [53]:
# Restrict the query to only return films worth more than $2,000,000 gross.
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE (release_year = 1990 OR release_year = 1999) AND (language = 'English' OR language = 'Spanish') AND gross >= 2000000;
            
            """).fetchall()

[('Arachnophobia', 1990),
 ('Back to the Future Part III', 1990),
 ("Child's Play 2", 1990),
 ('Dances with Wolves', 1990),
 ('Days of Thunder', 1990),
 ('Dick Tracy', 1990),
 ('Die Hard 2', 1990),
 ('Edward Scissorhands', 1990),
 ('Flatliners', 1990),
 ('Ghost', 1990),
 ('Goodfellas', 1990),
 ('Gremlins 2: The New Batch', 1990),
 ('Home Alone', 1990),
 ('Kindergarten Cop', 1990),
 ('Metropolitan', 1990),
 ("Mo' Better Blues", 1990),
 ('Predator 2', 1990),
 ('Pretty Woman', 1990),
 ('Quigley Down Under', 1990),
 ('Spaced Invaders', 1990),
 ('The Adventures of Ford Fairlane', 1990),
 ('The Godfather: Part III', 1990),
 ('The Hunt for Red October', 1990),
 ('Total Recall', 1990),
 ('Total Recall', 1990),
 ('Tremors', 1990),
 ('10 Things I Hate About You', 1999),
 ('200 Cigarettes', 1999),
 ('8MM', 1999),
 ('A Dog of Flanders', 1999),
 ('A Walk on the Moon', 1999),
 ('American Beauty', 1999),
 ('American Pie', 1999),
 ('An Ideal Husband', 1999),
 ('Analyze This', 1999),
 ("Angela's Ashes"

## 0.2.8 Using BETWEEN

In [54]:
# Select the title and release_year for films released between 1990 and 2000
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year BETWEEN 1990 AND 2000;
            
            """).fetchall()

[('Arachnophobia', 1990),
 ('Back to the Future Part III', 1990),
 ("Child's Play 2", 1990),
 ('Dances with Wolves', 1990),
 ('Days of Thunder', 1990),
 ('Dick Tracy', 1990),
 ('Die Hard 2', 1990),
 ('Edward Scissorhands', 1990),
 ('Flatliners', 1990),
 ('Ghost', 1990),
 ('Goodfellas', 1990),
 ('Gremlins 2: The New Batch', 1990),
 ('Home Alone', 1990),
 ('Kindergarten Cop', 1990),
 ("Marilyn Hotchkiss' Ballroom Dancing and Charm School", 1990),
 ('Metropolitan', 1990),
 ('Midnight Cabaret', 1990),
 ("Mo' Better Blues", 1990),
 ('Predator 2', 1990),
 ('Pretty Woman', 1990),
 ('Quigley Down Under', 1990),
 ('Shipwrecked', 1990),
 ('Spaced Invaders', 1990),
 ('The Adventures of Ford Fairlane', 1990),
 ('The Godfather: Part III', 1990),
 ('The Hunt for Red October', 1990),
 ('Total Recall', 1990),
 ('Total Recall', 1990),
 ('Tremors', 1990),
 ('Two Evil Eyes', 1990),
 ('Beastmaster 2: Through the Portal of Time', 1991),
 ("Bill & Ted's Bogus Journey", 1991),
 ('Boyz n the Hood', 1991),
 ('

In [55]:
# Build on your previous query to select only films with a budget over $100 million.
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year BETWEEN 1990 AND 2000 AND budget > 100000000;
            
            """).fetchall()

[('Terminator 2: Judgment Day', 1991),
 ('True Lies', 1994),
 ('Waterworld', 1995),
 ('Batman & Robin', 1997),
 ("Dante's Peak", 1997),
 ('Princess Mononoke', 1997),
 ('Speed 2: Cruise Control', 1997),
 ('Starship Troopers', 1997),
 ('Titanic', 1997),
 ('Tomorrow Never Dies', 1997),
 ("A Bug's Life", 1998),
 ('Antz', 1998),
 ('Armageddon', 1998),
 ('Les couloirs du temps: Les visiteurs II', 1998),
 ('Lethal Weapon 4', 1998),
 ('Tango', 1998),
 ('Godzilla 2000', 1999),
 ('Star Wars: Episode I - The Phantom Menace', 1999),
 ('Stuart Little', 1999),
 ('The Messenger: The Story of Joan of Arc', 1999),
 ('The World Is Not Enough', 1999),
 ('Wild Wild West', 1999),
 ('Dinosaur', 2000),
 ('Gladiator', 2000),
 ('How the Grinch Stole Christmas', 2000),
 ('Mission: Impossible II', 2000),
 ('The Patriot', 2000),
 ('The Perfect Storm', 2000)]

In [56]:
# Now, restrict the query to only return Spanish-language films.
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year BETWEEN 1990 AND 2000 AND budget > 100000000 AND language = 'Spanish';
            
            """).fetchall()

[('Tango', 1998)]

In [57]:
# Now, restrict the query to only return Spanish-language films.
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year BETWEEN 1990 AND 2000 AND budget > 100000000 AND (language = 'Spanish' OR language = 'French');
            
            """).fetchall()

[('Les couloirs du temps: Les visiteurs II', 1998), ('Tango', 1998)]

## 0.2.9 LIKE and NOT LIKE

In [58]:
# Select the names that start with B
cur.execute("""
            
            SELECT name 
            FROM people 
            WHERE name LIKE 'B%';
            
            """).fetchall()

[('B.J. Novak',),
 ('Babak Najafi',),
 ('Babar Ahmed',),
 ('Bahare Seddiqi',),
 ('Bai Ling',),
 ('Bailee Madison',),
 ('Balinese Tari Legong Dancers',),
 ('BÃ¡lint PÃ©ntek',),
 ('Baltasar KormÃ¡kur',),
 ('Balthazar Getty',),
 ('Bam Margera',),
 ('Barack Obama',),
 ('Barbara Bach',),
 ('Barbara Billingsley',),
 ('Barbara Carrera',),
 ('BÃ¡rbara Goenaga',),
 ('Barbara Hershey',),
 ('Barbara Kowalcyk',),
 ('BÃ¡rbara Mori',),
 ('Barbet Schroeder',),
 ('Barbie Castro',),
 ('Barbra Streisand',),
 ('Bari Hyman',),
 ('Barnard Hughes',),
 ('Barney Clark',),
 ('Barney Frank',),
 ('Barret Oliver',),
 ('Barrett Esposito',),
 ('Barry Bostwick',),
 ('Barry Cook',),
 ('Barry Corbin',),
 ('Barry Dennen',),
 ('Barry Fitzgerald',),
 ('Barry Levinson',),
 ('Barry Lynch',),
 ('Barry Shabaka Henley',),
 ('Barry Skolnick',),
 ('Barry Sonnenfeld',),
 ('Barry W. Blaustein',),
 ('Barry Watson',),
 ('Bart Freundlich',),
 ('Bart Shatto',),
 ('Bart the Bear',),
 ('Bashar Rahal',),
 ('Basher Savage',),
 ('Basil Ra

In [59]:
# Select the names that have r as the second letter
cur.execute("""
            
            SELECT name 
            FROM people 
            WHERE name LIKE '_r%';
            
            """).fetchall()

[('Ara Celi',),
 ('Aramis Knight',),
 ('Arben Bajraktaraj',),
 ('Arcelia RamÃ\xadrez',),
 ('Archie Kao',),
 ('Archie Panjabi',),
 ('Aretha Franklin',),
 ('Ari Folman',),
 ('Ari Gold',),
 ('Ari Graynor',),
 ('Ari Kirschenbaum',),
 ('Ari Sandel',),
 ('Aria Noelle Curzon',),
 ('Ariana Neal',),
 ('Ariana Richards',),
 ('Ariane Labed',),
 ('Ariane Schluter',),
 ('Arie Posin',),
 ('Ariel Gade',),
 ('Ariel Hsing',),
 ('Ariel Vromen',),
 ('Aries Spears',),
 ('Arik Ascherman',),
 ('Arjun Rampal',),
 ('Arjun Sablok',),
 ('Arliss Howard',),
 ('Arly Jover',),
 ('Armando Riesco',),
 ('Armin Mueller-Stahl',),
 ('Armin Rohde',),
 ('Armin Shimerman',),
 ('Art Carney',),
 ('Art Hindle',),
 ('Art Malik',),
 ('Arthur Agee',),
 ('Arthur Hiller',),
 ("Arthur O'Connell",),
 ('Arturo Castro',),
 ('Artyom Bogucharskiy',),
 ('Aryana Engineer',),
 ('Brad Anderson',),
 ('Brad Bird',),
 ('Brad Copeland',),
 ('Brad Furman',),
 ('Brad Garrett',),
 ('Brad J. Silverman',),
 ('Brad James',),
 ('Brad Lee Wind',),
 ('Br

In [60]:
# Select names that don't start with A
cur.execute("""
            
            SELECT name 
            FROM people WHERE name NOT LIKE 'A%';
            
            """).fetchall()

[('50 Cent',),
 ('Ã\x81lex Angulo',),
 ('Ã\x81lex de la Iglesia',),
 ('Ã\x81ngela Molina',),
 ('B.J. Novak',),
 ('Babak Najafi',),
 ('Babar Ahmed',),
 ('Bahare Seddiqi',),
 ('Bai Ling',),
 ('Bailee Madison',),
 ('Balinese Tari Legong Dancers',),
 ('BÃ¡lint PÃ©ntek',),
 ('Baltasar KormÃ¡kur',),
 ('Balthazar Getty',),
 ('Bam Margera',),
 ('Barack Obama',),
 ('Barbara Bach',),
 ('Barbara Billingsley',),
 ('Barbara Carrera',),
 ('BÃ¡rbara Goenaga',),
 ('Barbara Hershey',),
 ('Barbara Kowalcyk',),
 ('BÃ¡rbara Mori',),
 ('Barbet Schroeder',),
 ('Barbie Castro',),
 ('Barbra Streisand',),
 ('Bari Hyman',),
 ('Barnard Hughes',),
 ('Barney Clark',),
 ('Barney Frank',),
 ('Barret Oliver',),
 ('Barrett Esposito',),
 ('Barry Bostwick',),
 ('Barry Cook',),
 ('Barry Corbin',),
 ('Barry Dennen',),
 ('Barry Fitzgerald',),
 ('Barry Levinson',),
 ('Barry Lynch',),
 ('Barry Shabaka Henley',),
 ('Barry Skolnick',),
 ('Barry Sonnenfeld',),
 ('Barry W. Blaustein',),
 ('Barry Watson',),
 ('Bart Freundlich',),

## 0.2.10 WHERE IN

In [61]:
# -- Find the title and release_year for all films over two hours in length released in 1990 and 2000
cur.execute("""
            
            SELECT title, release_year 
            FROM films 
            WHERE release_year IN (1990, 2000) AND duration > 120;
            
            """).fetchall()

[('Dances with Wolves', 1990),
 ('Die Hard 2', 1990),
 ('Ghost', 1990),
 ('Goodfellas', 1990),
 ("Mo' Better Blues", 1990),
 ('Pretty Woman', 1990),
 ('The Godfather: Part III', 1990),
 ('The Hunt for Red October', 1990),
 ('All the Pretty Horses', 2000),
 ('Almost Famous', 2000),
 ('Bamboozled', 2000),
 ('Cast Away', 2000),
 ('Chocolat', 2000),
 ('Dancer in the Dark', 2000),
 ('Erin Brockovich', 2000),
 ('Finding Forrester', 2000),
 ('Fiza', 2000),
 ('Gladiator', 2000),
 ('Gone in Sixty Seconds', 2000),
 ('Keeping the Faith', 2000),
 ('Love & Basketball', 2000),
 ('Men of Honor', 2000),
 ('Mission: Impossible II', 2000),
 ('Pandaemonium', 2000),
 ('Pay It Forward', 2000),
 ('Pollock', 2000),
 ('Proof of Life', 2000),
 ('Quills', 2000),
 ('Reindeer Games', 2000),
 ('Space Cowboys', 2000),
 ('The 6th Day', 2000),
 ('The Contender', 2000),
 ('The Family Man', 2000),
 ('The House of Mirth', 2000),
 ('The Legend of Bagger Vance', 2000),
 ('The Patriot', 2000),
 ('The Perfect Storm', 2000),

In [62]:
# -- Find the title and language of all films in English, Spanish, and French
cur.execute("""
            
            SELECT title, language 
            FROM films 
            WHERE language IN ('English', 'Spanish', 'French');
            
            """).fetchall()

[('The Broadway Melody', 'English'),
 ("Hell's Angels", 'English'),
 ('A Farewell to Arms', 'English'),
 ('42nd Street', 'English'),
 ('She Done Him Wrong', 'English'),
 ('It Happened One Night', 'English'),
 ('Top Hat', 'English'),
 ('Modern Times', 'English'),
 ('The Charge of the Light Brigade', 'English'),
 ('Snow White and the Seven Dwarfs', 'English'),
 ('The Prisoner of Zenda', 'English'),
 ("Alexander's Ragtime Band", 'English'),
 ("You Can't Take It with You", 'English'),
 ('Gone with the Wind', 'English'),
 ('Mr. Smith Goes to Washington', 'English'),
 ('The Wizard of Oz', 'English'),
 ('Boom Town', 'English'),
 ('Fantasia', 'English'),
 ('Pinocchio', 'English'),
 ('Rebecca', 'English'),
 ('The Blue Bird', 'English'),
 ('How Green Was My Valley', 'English'),
 ('Bambi', 'English'),
 ('Casablanca', 'English'),
 ('A Guy Named Joe', 'English'),
 ('Bathing Beauty', 'English'),
 ('Spellbound', 'English'),
 ('State Fair', 'English'),
 ('The Lost Weekend', 'English'),
 ('The Valley o

In [63]:
# -- Find the title, certification, and language all films certified NC-17 or R that are in English, Italian, or Greek
cur.execute("""
            
            SELECT title, certification, language 
            FROM films 
            WHERE certification IN ('NC-17', 'R') AND language IN ('English', 'Italian', 'Greek');
            
            """).fetchall()

[('Psycho', 'R', 'English'),
 ('A Fistful of Dollars', 'R', 'Italian'),
 ("Rosemary's Baby", 'R', 'English'),
 ('The Wild Bunch', 'R', 'English'),
 ('Catch-22', 'R', 'English'),
 ('Cotton Comes to Harlem', 'R', 'English'),
 ('The Ballad of Cable Hogue', 'R', 'English'),
 ('The Conformist', 'R', 'Italian'),
 ('Woodstock', 'R', 'English'),
 ("Sweet Sweetback's Baadasssss Song", 'R', 'English'),
 ('The French Connection', 'R', 'English'),
 ('The French Connection', 'R', 'English'),
 ('Everything You Always Wanted to Know About Sex * But Were Afraid to Ask',
  'R',
  'English'),
 ('Frenzy', 'R', 'English'),
 ('Pink Flamingos', 'NC-17', 'English'),
 ('The Godfather', 'R', 'English'),
 ('High Plains Drifter', 'R', 'English'),
 ('Mean Streets', 'R', 'English'),
 ('Pat Garrett & Billy the Kid', 'R', 'English'),
 ('The Exorcist', 'R', 'English'),
 ('Blazing Saddles', 'R', 'English'),
 ('The Godfather: Part II', 'R', 'English'),
 ('The Texas Chain Saw Massacre', 'R', 'English'),
 ('The Texas Cha

## 0.2.11 Combining filtering and selecting

In [64]:
# -- Count the unique titles
cur.execute("""
            
            SELECT COUNT(DISTINCT title) AS nineties_english_films_for_teens 
            FROM films 
            WHERE release_year BETWEEN 1990 AND 1999 AND language = 'English' AND certification IN ('G', 'PG', 'PG-13');
            
            """).fetchall()

[(310,)]

## 0.2.14 Practice with NULLs

In [65]:
# -- List all film titles with missing budgets
cur.execute("""
            
            SELECT title AS no_budget_info 
            FROM films 
            WHERE budget IS NULL;
            
            """).fetchall()

[("Pandora's Box",),
 ('The Prisoner of Zenda',),
 ('The Blue Bird',),
 ('Bambi',),
 ('State Fair',),
 ('Open Secret',),
 ('Deadline - U.S.A.',),
 ('Ordet',),
 ("The Party's Over",),
 ('The Torture Chamber of Dr. Sadism',),
 ('Charly',),
 ('Machine Gun McCain',),
 ('The Night Visitor',),
 ('The Bad News Bears',),
 ('The Last Waltz',),
 ('All That Jazz',),
 ('The Muppet Movie',),
 ("Coal Miner's Daughter",),
 ('Galaxina',),
 ('The Four Seasons',),
 ('Butterfly',),
 ('Gabriela',),
 ('Trading Places',),
 ('The Lost Boys',),
 ('Midnight Cabaret',),
 ('Howards End',),
 ("What's Love Got to Do with It",),
 ('The Inkwell',),
 ('Bang',),
 ('The Horseman on the Roof',),
 ('The Jerky Boys',),
 ('The Last Big Thing',),
 ('24 7: Twenty Four Seven',),
 ('Clockwatchers',),
 ('DÃ©jÃ  Vu',),
 ('How to Be a Player',),
 ("I Love You, Don't Touch Me!",),
 ('Love and Death on Long Island',),
 ('Mr. Nice Guy',),
 ('Sparkler',),
 ('The Chambermaid on the Titanic',),
 ('The Edge',),
 ('Dancer, Texas Pop. 81'

In [66]:
# -- Count the number of films we have language data for
cur.execute("""
            
            SELECT COUNT(*) AS count_language_known 
            FROM films 
            WHERE language IS NOT NULL;
            
            """).fetchall()

[(4955,)]

In [67]:
print('Ok_')

Ok_
