<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [2]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [10]:
%%sql

WITH yearly_cohort AS (
    SELECT
        customerkey,
        EXTRACT(YEAR FROM MIN(orderdate)) AS cohort_year,
        SUM(quantity * netprice * exchangerate) AS total_customer_net_revenue
    FROM sales
    GROUP BY
        customerkey
),

cohort_summary AS (
    SELECT
        cohort_year,
        customerkey,
       total_customer_net_revenue AS customer_ltv,
        AVG(total_customer_net_revenue) OVER (PARTITION BY cohort_year) AS avg_cohort_ltv-- Added
    FROM yearly_cohort
),

cohort_final AS (
    SELECT DISTINCT
        cohort_year,
        avg_cohort_ltv
    FROM cohort_summary
    ORDER BY
        cohort_year
)

SELECT
    *,
    LAG(avg_cohort_ltv) OVER (ORDER BY cohort_year) AS prev_cohort_ltv,
    100 * (avg_cohort_ltv - LAG(avg_cohort_ltv) OVER (ORDER BY cohort_year)) /
        LAG(avg_cohort_ltv) OVER (ORDER BY cohort_year) AS ltv_change
FROM cohort_final

Unnamed: 0,cohort_year,avg_cohort_ltv,prev_cohort_ltv,ltv_change
0,2015,5271.59,,
1,2016,5404.92,5271.59,2.53
2,2017,5403.08,5404.92,-0.03
3,2018,4896.64,5403.08,-9.37
4,2019,4731.95,4896.64,-3.36
5,2020,3933.32,4731.95,-16.88
6,2021,3943.33,3933.32,0.25
7,2022,3315.52,3943.33,-15.92
8,2023,2543.18,3315.52,-23.29
9,2024,2037.55,2543.18,-19.88


In [3]:
%%sql
WITH weekly_orders AS(
  SELECT
  DATE_TRUNC('week' , orderdate) AS week,
  COUNT(DISTINCT orderkey) AS total_orders
  FROM sales
  WHERE EXTRACT(YEAR FROM orderdate) = 2023
  GROUP BY week
  ORDER BY week
)
SELECT *
FROM weekly_orders;

Unnamed: 0,week,total_orders
0,2022-12-26 00:00:00+00:00,12
1,2023-01-02 00:00:00+00:00,444
2,2023-01-09 00:00:00+00:00,363
3,2023-01-16 00:00:00+00:00,343
4,2023-01-23 00:00:00+00:00,348
5,2023-01-30 00:00:00+00:00,346
6,2023-02-06 00:00:00+00:00,352
7,2023-02-13 00:00:00+00:00,607
8,2023-02-20 00:00:00+00:00,642
9,2023-02-27 00:00:00+00:00,373


In [9]:
%%sql
WITH monthly_revenue AS(
  SELECT
    TO_CHAR(s.orderdate , 'YYYY-MM') AS order_month,
    SUM(s.quantity * s.netprice * s.exchangerate) AS total_customer_net_revenue
  FROM sales s
  WHERE EXTRACT(YEAR FROM s.orderdate) = 2023
  GROUP BY order_month
  ORDER BY order_month
)
SELECT
  *,
  FIRST_VALUE(total_customer_net_revenue) OVER (ORDER BY order_month) AS first_month_revenue,
  LAST_VALUE(total_customer_net_revenue) OVER (ORDER BY order_month ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS last_month_revenue,
  NTH_VALUE(total_customer_net_revenue , 3) OVER (ORDER BY order_month) AS third_month_revenue
FROM monthly_revenue;

Unnamed: 0,order_month,total_customer_net_revenue,first_month_revenue,last_month_revenue,third_month_revenue
0,2023-01,3664431.34,3664431.34,2928550.93,
1,2023-02,4465204.57,3664431.34,2928550.93,
2,2023-03,2244316.52,3664431.34,2928550.93,2244316.52
3,2023-04,1162796.16,3664431.34,2928550.93,2244316.52
4,2023-05,2943005.99,3664431.34,2928550.93,2244316.52
5,2023-06,2864500.03,3664431.34,2928550.93,2244316.52
6,2023-07,2337639.34,3664431.34,2928550.93,2244316.52
7,2023-08,2623919.79,3664431.34,2928550.93,2244316.52
8,2023-09,2622774.85,3664431.34,2928550.93,2244316.52
9,2023-10,2551322.61,3664431.34,2928550.93,2244316.52
