<a href="https://colab.research.google.com/github/sandu17767/project_sql_data_jobs/blob/main/Resources/Blank_SQL_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [2]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [11]:
%%sql
SELECT
orderdate,
COUNT(DISTINCT customerkey) AS total_customers
FROM sales
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY orderdate
ORDER BY orderdate;



Unnamed: 0,orderdate,total_customers
0,2023-01-01,12
1,2023-01-02,49
2,2023-01-03,64
3,2023-01-04,78
4,2023-01-05,87
...,...,...
359,2023-12-27,73
360,2023-12-28,75
361,2023-12-29,55
362,2023-12-30,91


In [19]:
%%sql
SELECT
orderdate,
COUNT(DISTINCT s.customerkey) AS total_customers,
COUNT(DISTINCT CASE WHEN continent = 'Europe' THEN s.customerkey  END  ) AS eu_customers
FROM sales s
LEFT JOIN customer c ON s.customerkey = c.customerkey
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY orderdate
ORDER BY orderdate;

Unnamed: 0,orderdate,total_customers,eu_customers
0,2023-01-01,12,6
1,2023-01-02,49,15
2,2023-01-03,64,17
3,2023-01-04,78,28
4,2023-01-05,87,22
...,...,...,...
359,2023-12-27,73,26
360,2023-12-28,75,24
361,2023-12-29,55,19
362,2023-12-30,91,25


In [28]:
%%sql
SELECT
p.categoryname,
SUM(s.quantity*s.netprice*s.exchangerate) AS total_net_revenue,
SUM(CASE WHEN orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN s.quantity * s.netprice * s.exchangerate ELSE  0 END) AS net_revenue_2022,
SUM(CASE WHEN orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN s.quantity * s.netprice * s.exchangerate ELSE  0 END) AS net_revenue_2022
FROM sales s
LEFT JOIN product p ON p.productkey =  s.productkey
GROUP BY categoryname
ORDER BY categoryname;


Unnamed: 0,categoryname,total_net_revenue,net_revenue_2022,net_revenue_2022.1
0,Audio,5312898.1,766938.21,688690.18
1,Cameras and camcorders,18520360.66,2382532.56,1983546.29
2,Cell phones,32624265.72,8119665.07,6002147.63
3,Computers,90619022.05,17862213.49,11650867.21
4,Games and Toys,1668574.13,316127.3,270374.96
5,Home Appliances,26607245.54,6612446.68,5919992.87
6,"Music, Movies and Audio Books",10588311.0,2989297.28,2180768.13
7,TV and Video,20466861.38,5815336.61,4412178.23


In [3]:
%%sql
SELECT
st.storecode,
COUNT(DISTINCT CASE WHEN c.gender =  'male' THEN c.customerkey ELSE  NULL END ) AS male_customers,
COUNT(DISTINCT CASE WHEN c.gender = 'female' THEN c.customerkey ELSE NULL END ) AS female_customers
FROM sales s
LEFT JOIN store st ON st.storekey = s.storekey
LEFT JOIN customer c ON c.customerkey = s.customerkey
WHERE s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY st.storecode
ORDER BY st.storecode
LIMIT 10;

Unnamed: 0,storecode,male_customers,female_customers
0,-1,4318,4469
1,1,27,28
2,3,45,45
3,4,39,31
4,5,64,58
5,6,37,46
6,7,101,92
7,8,92,83
8,9,65,86
9,10,74,86


In [4]:
%%sql
SELECT
p.color,
SUM(CASE WHEN c.age BETWEEN 18 AND 25 THEN s.quantity * s.netprice * s.exchangerate ELSE 0 END ) AS revenue_18_25,
SUM(CASE WHEN c.age BETWEEN 26 AND 35 THEN s.quantity * s.netprice * s.exchangerate ELSE 0 END) AS revenue_26_35,
SUM(CASE WHEN c.age BETWEEN 36 AND 45 THEN s.quantity *s.netprice * s.exchangerate ELSE 0 END ) AS revenue_36_45,
SUM(CASE WHEN c.age > 45 THEN s.quantity * s.netprice * s.exchangerate ELSE 0 END ) AS revenue_45_plus
FROM sales s
LEFT JOIN product p ON s.productkey = p.productkey
LEFT JOIN customer c ON c.customerkey = s.customerkey
WHERE orderdate BETWEEN '2023-01-01'  AND '2023-12-31'
GROUP BY p.color
ORDER BY p.color;


Unnamed: 0,color,revenue_18_25,revenue_26_35,revenue_36_45,revenue_45_plus
0,Azure,3090.69,1774.84,4038.86,18983.02
1,Black,943552.37,1238734.41,1341317.75,5358620.36
2,blue,1787.89,120.78,120.49,2332.4
3,Blue,106355.33,218804.28,288108.29,1052231.18
4,Brown,173724.2,328400.92,263582.64,1236801.78
5,Gold,103767.39,160052.97,208220.43,741488.38
6,Green,76812.16,210850.64,98165.38,464677.93
7,Grey,248999.05,340149.05,369611.65,1459721.76
8,Orange,26824.11,26681.98,22936.18,234684.26
9,Pink,59398.18,100738.15,114540.22,386449.24


In [9]:
%%sql
SELECT
p.subcategoryname,
COUNT(CASE WHEN p.price < 1000 THEN s.orderkey  END ) AS orders_products_0_1000,
COUNT(CASE WHEN p.price BETWEEN 1000 AND 2000 THEN s.orderkey  END ) AS orders_products_1000_2000,
COUNT(CASE WHEN p.price > 2000 THEN s.orderkey  END  ) AS orders_products_2000_plus
FROM sales s
LEFT JOIN product p ON p.productkey = s.productkey
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY p.subcategoryname
ORDER BY p.subcategoryname
LIMIT 10;

Unnamed: 0,subcategoryname,orders_products_0_1000,orders_products_1000_2000,orders_products_2000_plus
0,Air Conditioners,359,0,0
1,Bluetooth Headphones,843,0,0
2,Boxed Games,2044,0,0
3,Camcorders,250,77,0
4,Cameras & Camcorders Accessories,361,0,0
5,Car Video,549,0,0
6,Cell phones Accessories,1591,0,0
7,Coffee Machines,281,41,0
8,Computers Accessories,1265,0,0
9,Desktops,2613,0,0


In [5]:
%%sql
SELECT
st.countryname,
SUM(CASE WHEN st.squaremeters < 1000 THEN s.quantity * s.netprice * s.exchangerate ELSE 0 END) AS revenue_small_store,
SUM(CASE WHEN st.squaremeters BETWEEN 1000 AND 2000 THEN s.quantity * s.netprice * s.exchangerate ELSE 0 END ) AS revenue_medium_store,
SUM(CASE WHEN st.squaremeters > 2000 THEN s.quantity *s.netprice * s.exchangerate ELSE 0 END) AS revenue_large_store
FROM sales s
LEFT JOIN store st ON st.storekey = s.storekey
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
AND st.countryname <> 'online'
GROUP BY st.countryname
ORDER BY st.countryname;

Unnamed: 0,countryname,revenue_small_store,revenue_medium_store,revenue_large_store
0,Australia,152603.07,805753.93,332051.57
1,Canada,0.0,810136.0,1098302.94
2,France,387769.84,0.0,0.0
3,Germany,329324.58,1025798.82,192337.5
4,Italy,0.0,205734.46,142339.35
5,Netherlands,157601.88,448792.47,0.0
6,Online,0.0,0.0,0.0
7,United Kingdom,0.0,642214.71,229729.64
8,United States,282562.9,5528952.03,246768.97
