In [1]:
import pandas as pd
import psycopg2

def execute_query(sql_query, dbname='sakila', user='postgres', password='postgres', port='5432'):
    # Create a connection to the PostgreSQL database
    conn = psycopg2.connect(dbname=dbname, user=user, password=password, port=port)

    # Use read_sql to execute the query and load the results into a DataFrame
    df = pd.read_sql(sql_query, conn)

    # Close the database connection
    conn.close()

    # Return the DataFrame
    return df



# Adding and subtracting date and time values

In this exercise, you will calculate the actual number of days rented as well as the true expected_return_date by using the `rental_duration` column from the `film` table along with the familiar `rental_date` from the `rental` table.

In [2]:
query_result = execute_query(
    """
SELECT f.title, f.rental_duration,
    -- Calculate the number of days rented
    r.return_date - r.rental_date AS days_rented
FROM film AS f
     INNER JOIN inventory AS i ON f.film_id = i.film_id
     INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
ORDER BY f.title;
    """)
query_result.head()

Unnamed: 0,title,rental_duration,days_rented
0,ACADEMY DINOSAUR,6,5 days 18:51:00
1,ACADEMY DINOSAUR,6,6 days 04:07:00
2,ACADEMY DINOSAUR,6,4 days 03:25:00
3,ACADEMY DINOSAUR,6,4 days 20:17:00
4,ACADEMY DINOSAUR,6,6 days 19:49:00


In [3]:
query_result = execute_query(
    """
SELECT f.title, f.rental_duration,
    -- Calculate the number of days rented
	AGE(r.return_date, r.rental_date) AS days_rented
FROM film AS f
	INNER JOIN inventory AS i ON f.film_id = i.film_id
	INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
ORDER BY f.title;
    """)
query_result.head()

Unnamed: 0,title,rental_duration,days_rented
0,ACADEMY DINOSAUR,6,5 days 18:51:00
1,ACADEMY DINOSAUR,6,6 days 04:07:00
2,ACADEMY DINOSAUR,6,4 days 03:25:00
3,ACADEMY DINOSAUR,6,4 days 20:17:00
4,ACADEMY DINOSAUR,6,6 days 19:49:00


# INTERVAL arithmetic

If you were running a real DVD Rental store, there would be times when you would need to determine what film titles were currently out for rental with customers. In the previous exercise, we saw that some of the records in the results had a `NULL` value for the return_date. This is because the rental was still outstanding.

In [5]:
query_result = execute_query(
    """
SELECT
	f.title,
 	-- Convert the rental_duration to an interval
    INTERVAL '1' day * f.rental_duration AS rental_duration,
 	-- Calculate the days rented as we did previously
    r.return_date - r.rental_date AS days_rented
FROM film AS f
    INNER JOIN inventory AS i ON f.film_id = i.film_id
    INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
-- Filter the query to exclude outstanding rentals
WHERE r.return_date IS NOT NULL
ORDER BY f.title;
    """)
query_result.head()

Unnamed: 0,title,rental_duration,days_rented
0,ACADEMY DINOSAUR,6 days,0 days 23:15:00
1,ACADEMY DINOSAUR,6 days,1 days 03:05:00
2,ACADEMY DINOSAUR,6 days,5 days 23:56:00
3,ACADEMY DINOSAUR,6 days,5 days 01:34:00
4,ACADEMY DINOSAUR,6 days,7 days 03:12:00


# Calculating the expected return date

So now that you've practiced how to add and subtract timestamps and perform relative calculations using intervals, let's use those new skills to calculate the actual expected return date of a specific rental.

In [6]:
query_result = execute_query(
    """
SELECT
    f.title,
	r.rental_date,
    f.rental_duration,
    -- Add the rental duration to the rental date
    INTERVAL '1' day * f.rental_duration + r.rental_date AS expected_return_date,
    r.return_date
FROM film AS f
    INNER JOIN inventory AS i ON f.film_id = i.film_id
    INNER JOIN rental AS r ON i.inventory_id = r.inventory_id
ORDER BY f.title;
    """)
query_result.head()

Unnamed: 0,title,rental_date,rental_duration,expected_return_date,return_date
0,ACADEMY DINOSAUR,2005-07-31 22:08:29,6,2005-08-06 22:08:29,2005-08-06 16:59:29
1,ACADEMY DINOSAUR,2005-07-30 22:02:34,6,2005-08-05 22:02:34,2005-08-06 02:09:34
2,ACADEMY DINOSAUR,2005-08-18 18:36:16,6,2005-08-24 18:36:16,2005-08-22 22:01:16
3,ACADEMY DINOSAUR,2005-07-27 07:51:11,6,2005-08-02 07:51:11,2005-08-01 04:08:11
4,ACADEMY DINOSAUR,2005-07-29 09:41:38,6,2005-08-04 09:41:38,2005-08-05 05:30:38


# Current timestamp functions

Use the console to explore the `NOW()`, `CURRENT_TIMESTAMP`, `CURRENT_DATE` and `CURRENT_TIME` functions

In [7]:
execute_query('SELECT NOW()')

Unnamed: 0,now
0,2024-02-09 05:18:57.410493+00:00


In [8]:
execute_query('SELECT CURRENT_TIMESTAMP')

Unnamed: 0,current_timestamp
0,2024-02-09 05:19:12.855320+00:00


In [9]:
execute_query('SELECT CURRENT_TIME')


Unnamed: 0,current_time
0,11:19:18.626711+06:00


In [10]:
execute_query('SELECT CURRENT_DATE')

Unnamed: 0,current_date
0,2024-02-09


# Working with the current date and time

Because the Sakila database is a bit dated and most of the date and time values are from 2005 or 2006, you are going to practice using the current date and time in our queries without using Sakila. Get rid of timezone.

In [12]:
# Select the current timestamp without a timezone

execute_query('SELECT CAST( NOW() AS timestamp )')


Unnamed: 0,now
0,2024-02-09 11:22:01.059141


In [13]:
query_result = execute_query(
    """
SELECT 
	-- Select the current date
	CURRENT_DATE,
    -- CAST the result of the NOW() function to a date
    CAST( NOW() AS date )
    """)
query_result.head()

Unnamed: 0,current_date,now
0,2024-02-09,2024-02-09


# Manipulating the current date and time

Most of the time when you work with the current date and time, you will want to transform, manipulate, or perform operations on the value in your queries. In this exercise, you will practice adding an `INTERVAL` to the current timestamp as well as perform some more advanced calculations.

In [14]:
# Select the current timestamp without timezone

execute_query('SELECT CURRENT_TIMESTAMP::timestamp AS right_now;')

Unnamed: 0,right_now
0,2024-02-09 11:24:34.365965


In [15]:
query_result = execute_query(
    """
SELECT
	CURRENT_TIMESTAMP::timestamp AS right_now,
    INTERVAL '5 days' + CURRENT_TIMESTAMP AS five_days_from_now;
    """)
query_result.head()

Unnamed: 0,right_now,five_days_from_now
0,2024-02-09 11:24:48.984248,2024-02-14 05:24:48.984248+00:00


In [16]:
query_result = execute_query(
    """
SELECT
	CURRENT_TIMESTAMP(2)::timestamp AS right_now,
    interval '5 days' + CURRENT_TIMESTAMP(2) AS five_days_from_now;
    """)
query_result.head()

Unnamed: 0,right_now,five_days_from_now
0,2024-02-09 11:25:09.670,2024-02-14 05:25:09.670000+00:00


# Using EXTRACT

You can use `EXTRACT()` and `DATE_PART()` to easily create new fields in your queries by extracting sub-fields from a source timestamp field.

In [17]:
query_result = execute_query(
    """
SELECT 
  -- Extract day of week from rental_date
  EXTRACT(DOW FROM rental_date) AS dayofweek 
FROM rental 
LIMIT 100;
    """)
query_result.head()

Unnamed: 0,dayofweek
0,2.0
1,2.0
2,2.0
3,2.0
4,2.0


In [18]:
query_result = execute_query(
    """
-- Extract day of week from rental_date
SELECT 
  EXTRACT(dow FROM rental_date) AS dayofweek, 
  -- Count the number of rentals
  COUNT(*) as rentals 
FROM rental 
GROUP BY 1;
    """)
query_result.head()

Unnamed: 0,dayofweek,rentals
0,0.0,2320
1,6.0,2311
2,1.0,2247
3,2.0,2463
4,3.0,2231


# Using DATE_TRUNC

The `DATE_TRUNC()` function will truncate timestamp or interval data types to return a timestamp or interval at a specified precision.

In [19]:
query_result = execute_query(
    """
-- Truncate rental_date by year
SELECT DATE_TRUNC('year', rental_date) AS rental_year
FROM rental;
    """)
query_result.head()

Unnamed: 0,rental_year
0,2005-01-01
1,2005-01-01
2,2005-01-01
3,2005-01-01
4,2005-01-01


In [20]:

query_result = execute_query(
    """
-- Truncate rental_date by month
SELECT DATE_TRUNC('month', rental_date) AS rental_month
FROM rental;
    """)
query_result.head()

Unnamed: 0,rental_month
0,2005-05-01
1,2005-05-01
2,2005-05-01
3,2005-05-01
4,2005-05-01


In [21]:

query_result = execute_query(
    """
-- Truncate rental_date by day of the month 
SELECT DATE_TRUNC('day', rental_date) AS rental_day 
FROM rental;
    """)
query_result.head()

Unnamed: 0,rental_day
0,2005-05-24
1,2005-05-24
2,2005-05-24
3,2005-05-24
4,2005-05-24


In [22]:

query_result = execute_query(
    """
SELECT 
  DATE_TRUNC('day', rental_date) AS rental_day,
  -- Count total number of rentals 
  COUNT(*) AS rentals 
FROM rental
GROUP BY 1;
    """)
query_result.head()

Unnamed: 0,rental_day,rentals
0,2005-05-28,196
1,2005-05-25,137
2,2005-05-29,154
3,2005-08-16,23
4,2005-05-31,163


# Putting it all together

Many of the techniques you've learned in this course will be useful when building queries to extract data for model training. Now let's use some date/time functions to extract and manipulate some DVD rentals data from our fictional DVD rental store.

In [23]:

query_result = execute_query(
    """
SELECT 
  -- Extract the day of week date part from the rental_date
  EXTRACT(dow FROM rental_date) AS dayofweek,
  AGE(return_date, rental_date) AS rental_days
FROM rental AS r 
WHERE 
  -- Use an INTERVAL for the upper bound of the rental_date 
  rental_date BETWEEN CAST('2005-05-01' AS timestamp)
   AND CAST('2005-05-01' AS timestamp) + INTERVAL '90 day';
    """)
query_result.head()

Unnamed: 0,dayofweek,rental_days
0,2.0,1 days 23:11:00
1,2.0,3 days 20:46:00
2,2.0,7 days 23:09:00
3,2.0,9 days 02:39:00
4,2.0,8 days 05:28:00


In [24]:

query_result = execute_query(
    """
SELECT 
  c.first_name || ' ' || c.last_name AS customer_name,
  f.title,
  r.rental_date,
  -- Extract the day of week date part from the rental_date
  EXTRACT(dow FROM r.rental_date) AS dayofweek,
  AGE(r.return_date, r.rental_date) AS rental_days,
  -- Use DATE_TRUNC to get days from the AGE function
  CASE WHEN DATE_TRUNC('day', AGE(r.return_date, r.rental_date)) > 
  -- Calculate number of d
    f.rental_duration * INTERVAL '1' day 
  THEN TRUE 
  ELSE FALSE END AS past_due 
FROM 
  film AS f 
  INNER JOIN inventory AS i 
  	ON f.film_id = i.film_id 
  INNER JOIN rental AS r 
  	ON i.inventory_id = r.inventory_id 
  INNER JOIN customer AS c 
  	ON c.customer_id = r.customer_id 
WHERE 
  -- Use an INTERVAL for the upper bound of the rental_date 
  r.rental_date BETWEEN CAST('2005-05-01' AS DATE) 
  AND CAST('2005-05-01' AS DATE) + INTERVAL '90 day';
    """)
query_result.head()

Unnamed: 0,customer_name,title,rental_date,dayofweek,rental_days,past_due
0,CHARLOTTE HUNTER,BLANKET BEVERLY,2005-05-24 22:53:30,2.0,1 days 23:11:00,False
1,TOMMY COLLAZO,FREAKY POCUS,2005-05-24 22:54:33,2.0,3 days 20:46:00,False
2,MANUEL MURRELL,GRADUATE LORD,2005-05-24 23:03:39,2.0,7 days 23:09:00,False
3,ANDREW PURDY,LOVE SUICIDES,2005-05-24 23:04:41,2.0,9 days 02:39:00,True
4,DELORES HANSEN,IDOLS SNATCHERS,2005-05-24 23:05:21,2.0,8 days 05:28:00,True
