<a href="https://colab.research.google.com/github/leopriyam/SQL_Data_Analytics_Project/blob/main/3_Date_Differences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [2]:
%%sql

SELECT INTERVAL '5 years'

Unnamed: 0,interval
0,1825 days


In [3]:
%%sql

SELECT
	CURRENT_DATE,
	orderdate
FROM sales
WHERE
	EXTRACT(YEAR FROM orderdate) >= EXTRACT(YEAR FROM CURRENT_DATE) - 5  -- last 5 years

Unnamed: 0,current_date,orderdate
0,2025-12-27,2020-01-01
1,2025-12-27,2020-01-01
2,2025-12-27,2020-01-01
3,2025-12-27,2020-01-01
4,2025-12-27,2020-01-01
...,...,...
124446,2025-12-27,2024-04-20
124447,2025-12-27,2024-04-20
124448,2025-12-27,2024-04-20
124449,2025-12-27,2024-04-20


In [4]:
%%sql

SELECT
	CURRENT_DATE,
	orderdate
FROM sales
WHERE
	orderdate >= CURRENT_DATE - INTERVAL '5 years' -- Added

Unnamed: 0,current_date,orderdate
0,2025-12-27,2020-12-27
1,2025-12-27,2020-12-28
2,2025-12-27,2020-12-28
3,2025-12-27,2020-12-28
4,2025-12-27,2020-12-28
...,...,...
113283,2025-12-27,2024-04-20
113284,2025-12-27,2024-04-20
113285,2025-12-27,2024-04-20
113286,2025-12-27,2024-04-20


In [5]:
%%sql

SELECT
	CURRENT_DATE,
	s.orderdate,
	p.categoryname,
	SUM(s.quantity * s.netprice * s.exchangerate) AS net_revenue
FROM sales s
	LEFT JOIN product p ON s.productkey = p.productkey
WHERE
	EXTRACT(YEAR FROM s.orderdate) >= EXTRACT(YEAR FROM CURRENT_DATE) - 5  -- last 5 years
GROUP BY
	s.orderdate,
	p.categoryname
ORDER BY
	s.orderdate,
	p.categoryname

Unnamed: 0,current_date,orderdate,categoryname,net_revenue
0,2025-12-27,2020-01-01,Audio,5490.14
1,2025-12-27,2020-01-01,Cameras and camcorders,18880.06
2,2025-12-27,2020-01-01,Cell phones,22593.00
3,2025-12-27,2020-01-01,Computers,78554.54
4,2025-12-27,2020-01-01,Games and Toys,1476.43
...,...,...,...,...
11166,2025-12-27,2024-04-20,Computers,58353.68
11167,2025-12-27,2024-04-20,Games and Toys,1744.30
11168,2025-12-27,2024-04-20,Home Appliances,1562.04
11169,2025-12-27,2024-04-20,"Music, Movies and Audio Books",4949.43


In [6]:
%%sql

SELECT
    orderdate,
    deliverydate,
    AGE(deliverydate, orderdate) AS processing_time
FROM
    sales
ORDER BY RANDOM()
LIMIT 10

Unnamed: 0,orderdate,deliverydate,processing_time
0,2022-09-09,2022-09-12,3 days
1,2023-01-14,2023-01-17,3 days
2,2019-05-11,2019-05-11,0 days
3,2019-05-04,2019-05-04,0 days
4,2019-01-07,2019-01-07,0 days
5,2018-05-12,2018-05-12,0 days
6,2022-06-25,2022-06-28,3 days
7,2018-03-16,2018-03-16,0 days
8,2022-06-09,2022-06-09,0 days
9,2024-02-24,2024-02-25,1 days


In [7]:
%%sql

SELECT
    DATE_PART('year', orderdate) AS order_year,
    AVG(EXTRACT(DAYS FROM AGE(deliverydate, orderdate))) AS avg_processing_time
FROM
    sales
GROUP BY
    order_year;

Unnamed: 0,order_year,avg_processing_time
0,2019.0,0.814679102820342
1,2016.0,1.0828877005347595
2,2017.0,0.8331029398212146
3,2018.0,0.8624168664887119
4,2022.0,1.6233962099189088
5,2015.0,1.0982768691588785
6,2020.0,0.9298837312505548
7,2021.0,1.3570381602223909
8,2023.0,1.752672121971373
9,2024.0,1.66696278748396


In [8]:
%%sql

SELECT
    DATE_PART('year', orderdate) AS order_year,
    ROUND(AVG(EXTRACT(DAYS FROM AGE(deliverydate, orderdate))), 2) AS avg_processing_time
FROM
    sales
WHERE
    orderdate >= CURRENT_DATE - INTERVAL '5 years'
GROUP BY
    order_year;

Unnamed: 0,order_year,avg_processing_time
0,2022.0,1.62
1,2020.0,0.92
2,2021.0,1.36
3,2023.0,1.75
4,2024.0,1.67


In [9]:
%%sql

SELECT
    DATE_PART('year', orderdate) AS order_year,
    ROUND(AVG(EXTRACT(DAYS FROM AGE(deliverydate, orderdate))), 2) AS avg_processing_time,
    CAST(SUM(quantity * netprice * exchangerate) AS INTEGER) AS net_revenue  -- Added
FROM
    sales s
WHERE
    orderdate >= CURRENT_DATE - INTERVAL '5 years'
GROUP BY
    order_year
ORDER BY
    order_year;

Unnamed: 0,order_year,avg_processing_time,net_revenue
0,2020.0,0.92,85767
1,2021.0,1.36,21357977
2,2022.0,1.62,44864557
3,2023.0,1.75,33108566
4,2024.0,1.67,8396527



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

