<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [28]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [32]:
%%sql
SELECT
orderdate,
quantity,
netprice,
CASE
 WHEN quantity >=2 AND netprice >= 100 THEN 'Multiple High Value Items'
 WHEN quantity >= 100 THEN 'Single High Value Item'
 WHEN netprice >= 2 THEN 'Multiple Standard Item'
 ELSE 'Single Standard Item'
 END AS order_type
  FROM sales s;

Unnamed: 0,orderdate,quantity,netprice,order_type
0,2015-01-01,1,98.97,Multiple Standard Item
1,2015-01-01,1,659.78,Multiple Standard Item
2,2015-01-01,2,54.38,Multiple Standard Item
3,2015-01-01,4,286.69,Multiple High Value Items
4,2015-01-01,7,135.75,Multiple High Value Items
...,...,...,...,...
199868,2024-04-20,7,139.19,Multiple High Value Items
199869,2024-04-20,1,159.99,Multiple Standard Item
199870,2024-04-20,2,53.67,Multiple Standard Item
199871,2024-04-20,5,293.40,Multiple High Value Items


In [38]:
%%sql
WITH PERCENTILES AS (
  SELECT
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS net_revenue_25_percentile,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS net_revenue_75_percentile
  FROM sales s
  WHERE orderdate BETWEEN '2022-01-01' AND '2022-12-31'
)
SELECT
p.categoryname AS category,
  CASE
    WHEN (s.quantity * s.netprice * s.exchangerate) < pctl.net_revenue_25_percentile THEN '3-LOW'
    WHEN (s.quantity * s.netprice * s.exchangerate) >= pctl.net_revenue_75_percentile THEN '1-High'
    ELSE '2-Medium'
  END AS revenue_tier,
  SUM(s.quantity * s.netprice * s.exchangerate) AS total_revenue
FROM sales s
LEFT JOIN product p ON s.productkey = p.productkey
CROSS JOIN PERCENTILES pctl
GROUP BY
  p.categoryname,
  revenue_tier
ORDER BY
  p.categoryname,
  revenue_tier;

Unnamed: 0,category,revenue_tier,total_revenue
0,Audio,1-High,1089077.23
1,Audio,2-Medium,3923515.28
2,Audio,3-LOW,300305.59
3,Cameras and camcorders,1-High,14812098.11
4,Cameras and camcorders,2-Medium,3620129.66
5,Cameras and camcorders,3-LOW,88132.89
6,Cell phones,1-High,21097917.93
7,Cell phones,2-Medium,11084005.46
8,Cell phones,3-LOW,442342.32
9,Computers,1-High,78673147.18


In [42]:
%%sql
SELECT
CASE
WHEN c.age < 25 THEN '<25'
WHEN c.age BETWEEN 25 AND 44 THEN '25-44'
ELSE '45+'
END AS age_group,
SUM(quantity*netprice*exchangerate) AS total_purchases
FROM sales s
INNER JOIN customer c ON c.customerkey = s.customerkey
GROUP BY age_group;

Unnamed: 0,age_group,total_purchases
0,<25,18868637.76
1,25-44,62515352.46
2,45+,125023548.36


In [45]:
%%sql
SELECT
SUM(CASE WHEN c.age < 25 THEN (s.quantity *s.netprice * s.exchangerate) ELSE 0 END) AS total_purchases_under_25,
SUM(CASE WHEN c.age BETWEEN 25 AND 44 THEN (s.quantity * s.netprice * s.exchangerate)ELSE 0 END) AS total_purchases_25_44,
SUM(CASE WHEN c.age >= 45 THEN (s.quantity *s.netprice * s.exchangerate)ELSE 0 END) AS total_purchases_45_plus
FROM sales s
INNER JOIN customer c ON c.customerkey = s.customerkey;

Unnamed: 0,total_purchases_under_25,total_purchases_25_44,total_purchases_45_plus
0,18868637.76,62515352.46,125023548.36


In [50]:
%%sql
SELECT
customerkey,
SUM(quantity * netprice * exchangerate ) AS total_spending,
CASE
WHEN SUM(quantity * netprice * exchangerate ) < 500 THEN 'low spender'
WHEN SUM(quantity*netprice*exchangerate) BETWEEN 500 AND 2000 THEN 'Medium spender'
ELSE 'High spender'
END AS spending_category
FROM sales s
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY customerkey;


Unnamed: 0,customerkey,total_spending,spending_category
0,418360,68.01,low spender
1,1009137,2909.31,High spender
2,1128199,602.40,Medium spender
3,1217159,1329.53,Medium spender
4,1572543,160.46,low spender
...,...,...,...
13741,1201475,3095.41,High spender
13742,335998,4396.29,High spender
13743,1573639,2610.78,High spender
13744,1520240,115.69,low spender


In [56]:
%%sql
SELECT
  CASE
  WHEN weight IS NULL or weightunit IS NULL THEN '1-No weight specified'
  WHEN weightunit = 'LBS' AND weight < 5 THEN '2 - Very Light (< 5 lbs)'
  WHEN weightunit = 'LBS' AND weight BETWEEN 5 AND 25 THEN '3 - Light (5-25 lbs)'
  WHEN weightunit = 'LBS' AND weight BETWEEN 25 AND 100 THEN '4 - Medium (25-100 lbs)'
  WHEN weightunit = 'LBS' AND weight > 100 THEN '5 - Heavy (> 100 lbs)'
  WHEN weightunit = 'OZ' AND weight < 5 THEN '6 - Light Ounces (< 5 oz)'
  WHEN weightunit = 'OZ' AND weight >= 5 THEN '7 - Heavy Ounces (≥ 5 oz)'
  WHEN weightunit = 'G' THEN '8 - Metric Weights'
  ELSE '9 - Other Weight Categories'
  END AS weight_category,
  COUNT(productkey) AS product_count
FROM product
GROUP BY weight_category
ORDER BY weight_category;

Unnamed: 0,weight_category,product_count
0,1-No weight specified,284
1,9 - Other Weight Categories,2233


In [54]:
%%sql
SELECT
    CASE
        WHEN weight IS NULL OR weightunit IS NULL THEN '1 - No Weight Specified'
        WHEN weightunit = 'pounds' AND weight < 5 THEN '2 - Very Light (< 5 lbs)'
        WHEN weightunit = 'pounds' AND weight BETWEEN 5 AND 25 THEN '3 - Light (5-25 lbs)'
        WHEN weightunit = 'pounds' AND weight BETWEEN 25 AND 100 THEN '4 - Medium (25-100 lbs)'
        WHEN weightunit = 'pounds' AND weight > 100 THEN '5 - Heavy (> 100 lbs)'
        WHEN weightunit = 'ounces' AND weight < 5 THEN '6 - Light Ounces (< 5 oz)'
        WHEN weightunit = 'ounces' AND weight >= 5 THEN '7 - Heavy Ounces (≥ 5 oz)'
        WHEN weightunit = 'grams' THEN '8 - Metric Weights'
        ELSE '9 - Other Weight Categories'
    END AS weight_category,
    COUNT(productkey) AS product_count
FROM product
GROUP BY weight_category
ORDER BY weight_category;

Unnamed: 0,weight_category,product_count
0,1 - No Weight Specified,284
1,2 - Very Light (< 5 lbs),568
2,3 - Light (5-25 lbs),751
3,4 - Medium (25-100 lbs),391
4,5 - Heavy (> 100 lbs),112
5,6 - Light Ounces (< 5 oz),225
6,7 - Heavy Ounces (≥ 5 oz),176
7,8 - Metric Weights,10


In [60]:
%%sql
WITH store_revenue AS(
  SELECT
  st.storekey,
  st.squaremeters,
  SUM(s.quantity*s.netprice*s.exchangerate) AS total_net_revenue
FROM sales s
LEFT JOIN store st ON s.storekey = st.storekey
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY st.storekey,
st.squaremeters
)
SELECT *
FROM store_revenue
LIMIT 10;

Unnamed: 0,storekey,squaremeters,total_net_revenue
0,10,595.0,152603.07
1,35,3000.0,332051.57
2,40,2000.0,255178.79
3,50,2000.0,304304.82
4,60,2000.0,246270.31
5,74,3500.0,603045.14
6,80,2105.0,495257.8
7,90,1500.0,413747.7
8,100,1210.0,396388.3
9,120,350.0,36233.44


In [None]:
%%sql


In [64]:
%%sql
WITH store_revenue AS (
    SELECT
        st.storekey,
        st.squaremeters,
        SUM(s.quantity * s.netprice * s.exchangerate) AS revenue
    FROM sales s
    JOIN store st ON s.storekey = st.storekey
    WHERE s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
    GROUP BY st.storekey, st.squaremeters
)
SELECT
    CASE
        WHEN squaremeters < 1000 AND revenue < 100000 THEN '1 - Small Store - Low Revenue'
        WHEN squaremeters < 1000 AND revenue >= 100000 THEN '2 - Small Store - High Revenue'
        WHEN squaremeters BETWEEN 1000 AND 2000 AND revenue < 300000 THEN '3 - Medium Store - Low Revenue'
        WHEN squaremeters BETWEEN 1000 AND 2000 AND revenue >= 300000 THEN '4 - Medium Store - High Revenue'
        WHEN squaremeters > 2000 AND revenue < 500000 THEN '5 - Large Store - Low Revenue'
        WHEN squaremeters > 2000 AND revenue >= 500000 THEN '6 - Large Store - High Revenue'
        WHEN squaremeters IS NULL THEN '7 - Online Store'
    END AS store_category,
    SUM(revenue) AS total_net_revenue,
    (SUM(revenue) / (SELECT SUM(revenue) FROM store_revenue) * 100.0) AS percentage_contribution
FROM store_revenue
GROUP BY store_category
ORDER BY store_category;


Unnamed: 0,store_category,total_net_revenue,percentage_contribution
0,1 - Small Store - Low Revenue,387769.84,1.17
1,2 - Small Store - High Revenue,922092.43,2.79
2,3 - Medium Store - Low Revenue,5148573.92,15.55
3,4 - Medium Store - High Revenue,4318808.5,13.04
4,5 - Large Store - Low Revenue,1638484.83,4.95
5,6 - Large Store - High Revenue,603045.14,1.82
6,7 - Online Store,20089790.85,60.68
