<a href="https://colab.research.google.com/github/leopriyam/SQL_Data_Analytics_Project/blob/main/2_Statistical_Aggregations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [3]:
%%sql

SELECT
    p.categoryname AS category,
    AVG(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice / s.exchangerate) END) AS avg_net_revenue_2022,
    AVG(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice / s.exchangerate) END) AS avg_net_revenue_2023
FROM
    sales s
    LEFT JOIN product p ON s.productkey = p.productkey
GROUP BY
    p.categoryname
ORDER BY
    p.categoryname;

Unnamed: 0,category,avg_net_revenue_2022,avg_net_revenue_2023
0,Audio,372.01,399.37
1,Cameras and camcorders,1154.55,1151.87
2,Cell phones,688.91,591.13
3,Computers,1512.57,1230.32
4,Games and Toys,77.24,77.71
5,Home Appliances,1649.14,1809.65
6,"Music, Movies and Audio Books",368.32,316.44
7,TV and Video,1466.02,1595.04


In [4]:
%%sql

SELECT
    p.categoryname AS category,
    MIN(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice / s.exchangerate) END) AS min_net_revenue_2022,
    MIN(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice / s.exchangerate) END) AS min_net_revenue_2023
FROM
    sales s
    LEFT JOIN product p ON s.productkey = p.productkey
GROUP BY
    p.categoryname
ORDER BY
    p.categoryname;

Unnamed: 0,category,min_net_revenue_2022,min_net_revenue_2023
0,Audio,8.64,11.3
1,Cameras and camcorders,4.48,4.65
2,Cell phones,2.05,1.81
3,Computers,0.83,0.6
4,Games and Toys,2.75,2.91
5,Home Appliances,3.45,4.54
6,"Music, Movies and Audio Books",5.86,5.7
7,TV and Video,34.87,42.09


In [6]:
%%sql

SELECT
    p.categoryname AS category,
    MAX(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice / s.exchangerate) END) AS max_net_revenue_2022,
    MAX(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice / s.exchangerate) END) AS max_net_revenue_2022
FROM
    sales s
    LEFT JOIN product p ON s.productkey = p.productkey
GROUP BY
    p.categoryname
ORDER BY
    p.categoryname;

Unnamed: 0,category,max_net_revenue_2022,max_net_revenue_2022.1
0,Audio,2664.0,3707.51
1,Cameras and camcorders,13730.47,13572.0
2,Cell phones,7980.27,7298.09
3,Computers,33361.65,25430.98
4,Games and Toys,5253.16,5233.22
5,Home Appliances,28692.31,29045.42
6,"Music, Movies and Audio Books",3771.97,3171.61
7,TV and Video,27483.99,28993.53


In [7]:
%%sql

SELECT
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (quantity * netprice / exchangerate)) AS median
FROM
    sales
WHERE
    orderdate BETWEEN '2022-01-01' AND '2023-12-31';

Unnamed: 0,median
0,377.73


In [10]:
%%sql

SELECT
    p.categoryname AS category,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (CASE
        WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice / s.exchangerate)
    END)) AS y2022_median_sales,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (CASE
        WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice / s.exchangerate)
    END)) AS y2023_median_sales
FROM
    sales s
    LEFT JOIN product p ON s.productkey = p.productkey
GROUP BY
    p.categoryname
ORDER BY
    p.categoryname;

Unnamed: 0,category,y2022_median_sales,y2023_median_sales
0,Audio,246.21,260.63
1,Cameras and camcorders,652.79,639.18
2,Cell phones,404.6,350.1
3,Computers,769.36,617.89
4,Games and Toys,32.0,31.98
5,Home Appliances,772.2,798.52
6,"Music, Movies and Audio Books",173.97,152.74
7,TV and Video,697.22,738.0
