# Table Overview

In [1]:
import sys
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the ipython-sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

## Dataset Overview

Goal:
- Familiar with dataset
- Looking at the main tables
- Explain why we use exchangerate

## Tables

There are X tables in total but we'll be mainly working with the following:

1. Sales
2. Custoemrs
2. Product

Below is the EDR Diagram

**Insert EDR Diagram 📊**

### Sales

Overview of the sales table and the columns we'll use the most.

Net Revenue
Definition: The total revenue after accounting for discounts, promotions, and adjustments.
Formula: NetPrice * Quantity
Why: The NetPrice reflects the actual price paid by customers, including any discounts applied.

Comparison to Gross Revenue
Gross Revenue: Based on the full price without discounts (UnitPrice * Quantity).
Net Revenue: Adjusted for discounts (NetPrice * Quantity).


In [None]:
%%sql

SELECT
    s.orderkey,
    s.orderdate,
    s.customerkey,
    s.storekey,
    s.productkey,
    s.quantity,
    s.netprice,
    s.currencycode,
    s.exchangerate
FROM
    sales s
ORDER BY
    s.orderkey

Unnamed: 0,orderkey,orderdate,customerkey,storekey,productkey,quantity,unitprice,currencycode,exchangerate
0,1000,2015-01-01,947009,400,48,1,112.4625,GBP,0.64155
1,1000,2015-01-01,947009,400,460,1,749.7500,GBP,0.64155
2,1001,2015-01-01,1772036,430,1730,2,54.3760,USD,1.00000
3,1002,2015-01-01,1518349,660,955,4,315.0400,USD,1.00000
4,1002,2015-01-01,1518349,660,62,7,135.7500,USD,1.00000
...,...,...,...,...,...,...,...,...,...
199868,3398034,2024-04-20,664396,999999,1651,7,159.9900,EUR,0.93870
199869,3398034,2024-04-20,664396,999999,1646,1,159.9900,EUR,0.93870
199870,3398035,2024-04-20,267690,999999,1575,2,60.9900,CAD,1.37670
199871,3398035,2024-04-20,267690,999999,415,5,326.0000,CAD,1.37670


Add the calculation for the `net_revenue`.

We're multiplying `quantity * netprice * exchangerate`.

The `exchangerate` must be multiplied because not every sale is in USD currency, you can see which currency it is in the `currencycode` column.

In [None]:
%%sql

SELECT
    s.orderkey,
    s.orderdate,
    s.customerkey,
    s.storekey,
    s.productkey,
    s.quantity,
    s.unitprice,
    s.currencycode,
    s.exchangerate,
    s.quantity * s.netprice * s.exchangerate AS net_revenue -- Added
FROM
    sales s
ORDER BY
    s.orderkey

Unnamed: 0,orderkey,orderdate,customerkey,storekey,productkey,quantity,unitprice,currencycode,exchangerate,total_sale_amount
0,1000,2015-01-01,947009,400,48,1,112.4625,GBP,0.64155,72.150317
1,1000,2015-01-01,947009,400,460,1,749.7500,GBP,0.64155,481.002112
2,1001,2015-01-01,1772036,430,1730,2,54.3760,USD,1.00000,108.752000
3,1002,2015-01-01,1518349,660,955,4,315.0400,USD,1.00000,1260.160000
4,1002,2015-01-01,1518349,660,62,7,135.7500,USD,1.00000,950.250000
...,...,...,...,...,...,...,...,...,...,...
199868,3398034,2024-04-20,664396,999999,1651,7,159.9900,EUR,0.93870,1051.278291
199869,3398034,2024-04-20,664396,999999,1646,1,159.9900,EUR,0.93870,150.182613
199870,3398035,2024-04-20,267690,999999,1575,2,60.9900,CAD,1.37670,167.929866
199871,3398035,2024-04-20,267690,999999,415,5,326.0000,CAD,1.37670,2244.021000


### Customers

In [4]:
%%sql

SELECT
    s.orderkey,
    s.orderdate,
    s.customerkey,
    s.storekey,
    s.productkey,
    s.quantity,
    s.unitprice,
    s.quantity * s.unitprice * s.exchangerate AS total_sale_amount
FROM
    sales s
ORDER BY
    s.orderkey

Unnamed: 0,orderkey,orderdate,customerkey,storekey,productkey,quantity,unitprice,total_sale_amount
0,1000,2015-01-01,947009,400,48,1,112.4625,72.150317
1,1000,2015-01-01,947009,400,460,1,749.7500,481.002112
2,1001,2015-01-01,1772036,430,1730,2,54.3760,108.752000
3,1002,2015-01-01,1518349,660,955,4,315.0400,1260.160000
4,1002,2015-01-01,1518349,660,62,7,135.7500,950.250000
...,...,...,...,...,...,...,...,...
199868,3398034,2024-04-20,664396,999999,1651,7,159.9900,1051.278291
199869,3398034,2024-04-20,664396,999999,1646,1,159.9900,150.182613
199870,3398035,2024-04-20,267690,999999,1575,2,60.9900,167.929866
199871,3398035,2024-04-20,267690,999999,415,5,326.0000,2244.021000


### Product

In [5]:
%%sql

SELECT
    s.orderkey,
    s.orderdate,
    s.customerkey,
    s.storekey,
    s.productkey,
    s.quantity,
    s.unitprice,
    s.quantity * s.unitprice * s.exchangerate AS total_sale_amount
FROM
    sales s
ORDER BY
    s.orderkey

Unnamed: 0,orderkey,orderdate,customerkey,storekey,productkey,quantity,unitprice,total_sale_amount
0,1000,2015-01-01,947009,400,48,1,112.4625,72.150317
1,1000,2015-01-01,947009,400,460,1,749.7500,481.002112
2,1001,2015-01-01,1772036,430,1730,2,54.3760,108.752000
3,1002,2015-01-01,1518349,660,955,4,315.0400,1260.160000
4,1002,2015-01-01,1518349,660,62,7,135.7500,950.250000
...,...,...,...,...,...,...,...,...
199868,3398034,2024-04-20,664396,999999,1651,7,159.9900,1051.278291
199869,3398034,2024-04-20,664396,999999,1646,1,159.9900,150.182613
199870,3398035,2024-04-20,267690,999999,1575,2,60.9900,167.929866
199871,3398035,2024-04-20,267690,999999,415,5,326.0000,2244.021000
