# Day 2 SQL Outline 

  - 1.0 Import CSV Files into Pandas Data Frame
  - 1.1 Create a SQLite Database and Import DataFrames
  - 2.0 Load SQL Extension and Connect to Database
  - 2.1 View Tables 
  - 3.0 Query Data Using SQL in JupyterLab 
  - 3.1 Running SQL Queries 
  - 4.0 Test Data Integrity 
  - 4.1 Data Integrity Checks
  - 4.1.1. Count Rows in a Table
  - 4.1.2. Check for Null Values
  - 4.1.3. Validate Foreign Keys
  - 4.2. Advanced Data Integrity Checks 
  - 4.2.1. Check for Duplicate Primary Keys
  - 4.2.2. Ensure Referential Integrity
  - 4.2.3. Validate Data Types and Ranges
  - 5.0 Explore Data and Generating Insights
  - 5.1. Top-Selling Products
  - 5.1.1. Sales by Region
  - 5.1.2. Inventory Levels for Reordering
  - 5.1.3. Supplier Performance Metrics
  - 5.1.4. Monthly Sales by Product Category
  - 5.1.5. Customer Segment Insights




## 1.0 Import CSV Files into Pandas Data Frame

In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [3]:
# Load CSV Files
csv_files = {
    'calendar.csv': 'calendar',
    'suppliers.csv': 'suppliers',
    'customers.csv': 'customers',
    'products.csv': 'products',
    'bom.csv': 'bom',
    'warehouses.csv': 'warehouses',
    'stores.csv': 'stores',
    'manufacturing.csv': 'manufacturing',
    'inventory_records.csv': 'inventory_records',
    'production_schedule.csv': 'production_schedule',
    'sales_orders.csv': 'sales_orders',
    'order_line_items.csv': 'order_line_items',
    'backorders.csv': 'backorders',
    'purchase_orders.csv': 'purchase_orders',
    'purchase_order_line_items.csv': 'purchase_order_line_items',
    'material_requirements.csv': 'material_requirements',
    'forecast.csv': 'forecast'
}

# Read CSV files into DataFrames
dataframes = {}
for file, table_name in csv_files.items():
    df = pd.read_csv(file)
    dataframes[table_name] = df


### 1.1 Create a SQLite Database and Import DataFrames

In [4]:
# Create a SQLite engine
engine = create_engine('sqlite:///supply_chain.db')
# Write DataFrames to SQLite database
for table_name, df in dataframes.items():
 df.to_sql(table_name, engine, if_exists='replace', index=False)

## 2.0 Load SQL Extension and Connect to Database

In [5]:
%load_ext sql
# Connect to the SQLite database
%sql sqlite:///supply_chain.db

### 2.1 View Tables

In [6]:
%%sql
SELECT name FROM sqlite_master WHERE type='table';

 * sqlite:///supply_chain.db
Done.


name
calendar
suppliers
customers
products
bom
warehouses
stores
manufacturing
inventory_records
production_schedule


## 3.0 Query Data Using SQL in JupyterLab

### 3.1 Running SQL Queries

In [7]:
%%sql
SELECT * FROM customers LIMIT 10;

 * sqlite:///supply_chain.db
Done.


customer_id,customer_name,customer_type,contact_name,phone_number,email,billing_address,shipping_address,city,state_province,postal_code,country,payment_terms,credit_limit,account_manager,customer_segment,preferred_customer,date_created
1,Customer_1,Wholesale,Contact_1,+1-555-1194,contact1@customer.com,370 Market St,803 Market St,New York,State,87777,Canada,Net 60,12049,Manager_5,Professional,1,2022-01-27
2,Customer_2,Retail,Contact_2,+1-555-1274,contact2@customer.com,271 Market St,968 Market St,Vancouver,State,66861,Canada,Prepaid,0,Manager_3,Professional,1,2022-06-06
3,Customer_3,Retail,Contact_3,+1-555-9673,contact3@customer.com,841 Market St,708 Market St,Los Angeles,State,29473,USA,Prepaid,0,Manager_3,Casual Shopper,0,2022-08-27
4,Customer_4,Retail,Contact_4,+1-555-5501,contact4@customer.com,802 Market St,992 Market St,New York,State,92178,Canada,Prepaid,0,Manager_4,Casual Shopper,1,2022-06-05
5,Customer_5,Retail,Contact_5,+1-555-9777,contact5@customer.com,450 Market St,189 Market St,Los Angeles,State,62952,USA,Prepaid,0,Manager_2,Casual Shopper,0,2022-10-18
6,Customer_6,E-commerce,Contact_6,+1-555-5697,contact6@customer.com,564 Market St,441 Market St,Los Angeles,State,42112,Canada,Prepaid,0,Manager_2,Professional,0,2022-05-16
7,Customer_7,E-commerce,Contact_7,+1-555-1280,contact7@customer.com,710 Market St,578 Market St,Los Angeles,State,14455,USA,Prepaid,0,Manager_1,Outdoor Enthusiast,1,2022-05-24
8,Customer_8,E-commerce,Contact_8,+1-555-9228,contact8@customer.com,704 Market St,147 Market St,Toronto,State,25571,USA,Prepaid,0,Manager_5,Casual Shopper,1,2022-04-16
9,Customer_9,Retail,Contact_9,+1-555-8054,contact9@customer.com,170 Market St,205 Market St,Los Angeles,State,41569,Canada,Prepaid,0,Manager_3,Professional,1,2022-06-07
10,Customer_10,Retail,Contact_10,+1-555-3937,contact10@customer.com,335 Market St,633 Market St,Toronto,State,61989,Canada,Prepaid,0,Manager_4,Casual Shopper,1,2022-05-26


## 4.0 Data Integrity Checks

### 4.1.1. Count Rows in a Table

In [8]:
%%sql
SELECT COUNT(*) FROM customers;

 * sqlite:///supply_chain.db
Done.


COUNT(*)
2000


### 4.1.2. Check for Null Values

In [9]:
%%sql
SELECT * FROM products WHERE product_id IS NULL;

 * sqlite:///supply_chain.db
Done.


product_id,product_name,category,sub_category,brand,description,sku,upc,price,cost_price,size,color,material,weight,dimensions,season,gender,launch_date,discontinue_date,tax_class,status


### 4.1.3. Validate Foreign Keys

In [10]:
%%sql
SELECT s.order_id
FROM sales_orders s
LEFT JOIN customers c ON s.customer_id = c.customer_id
WHERE c.customer_id IS NULL;

 * sqlite:///supply_chain.db
Done.


order_id


## 4.2.0. Advanced Data Integrity Checks

### 4.2.1. Check for Duplicate Primary Keys

In [11]:
%%sql
SELECT product_id, COUNT(*) as count
FROM products
GROUP BY product_id
HAVING count > 1;

 * sqlite:///supply_chain.db
Done.


product_id,count


### 4.2.2. Ensure Referential Integrity

In [12]:
%%sql
SELECT oli.order_line_id
FROM order_line_items oli
LEFT JOIN products p ON oli.product_id = p.product_id
WHERE p.product_id IS NULL;

 * sqlite:///supply_chain.db
Done.


order_line_id


### 4.2.3. Validate Data Types and Ranges

In [13]:
%%sql
SELECT * FROM products WHERE price < 0

 * sqlite:///supply_chain.db
Done.


product_id,product_name,category,sub_category,brand,description,sku,upc,price,cost_price,size,color,material,weight,dimensions,season,gender,launch_date,discontinue_date,tax_class,status


## 5.0 Explore Data and Generating Insights

### 5.1. Top-Selling Products

In [14]:
%%sql
SELECT p.product_name, SUM(oli.quantity_ordered) as total_sold
FROM order_line_items oli
JOIN products p ON oli.product_id = p.product_id
GROUP BY p.product_name
ORDER BY total_sold DESC
LIMIT 10;

 * sqlite:///supply_chain.db
Done.


product_name,total_sold
Shirts 47,48
Backpacks 282,47
Backpacks 45,45
Sleeping Bags 81,44
Sleeping Bags 86,43
Pants 32,42
Tents 177,41
Tents 14,41
Sleeping Bags 219,41
Shirts 284,41


### 5.1.1. Sales by City and Customer Segment

In [34]:
%%sql
SELECT
    c.city AS region,
    SUM(so.total_order_value) AS total_sales
FROM
    sales_orders so
    JOIN customers c ON so.customer_id = c.customer_id
GROUP BY
    c.city
ORDER BY
    total_sales DESC;


 * sqlite:///supply_chain.db
Done.


region,total_sales
Toronto,885247.865
Vancouver,849254.825
Los Angeles,832234.545
Chicago,790753.664
New York,789462.58


In [17]:
%%sql
SELECT
    c.customer_segment,
    SUM(so.total_order_value) AS total_sales
FROM
    sales_orders so
    JOIN customers c ON so.customer_id = c.customer_id
GROUP BY
    c.customer_segment
ORDER BY
    total_sales DESC;


 * sqlite:///supply_chain.db
Done.


customer_segment,total_sales
Outdoor Enthusiast,1518502.241
Casual Shopper,1361783.292
Professional,1266667.946


### 5.1.2. Inventory Levels for Reordering

In [19]:
%%sql
SELECT product_id, quantity_on_hand, reorder_point
FROM inventory_records
WHERE quantity_on_hand <= reorder_point;

 * sqlite:///supply_chain.db
Done.


product_id,quantity_on_hand,reorder_point
1,-10,21
1,0,46
1,9,36
1,-23,24
1,11,38
1,31,38
1,16,33
1,25,42
1,27,39
1,20,24


In [33]:
%%sql
SELECT
    location_type,
    location_id,
    COUNT(DISTINCT product_id) AS products_past_reorder_point
FROM
    inventory_records
WHERE
    quantity_on_hand <= reorder_point
GROUP BY
    location_type,
    location_id
ORDER BY
    location_type ASC,
    location_id ASC;


 * sqlite:///supply_chain.db
Done.


location_type,location_id,products_past_reorder_point
Store,1,135
Store,2,146
Store,3,125
Store,4,136
Store,5,148
Store,6,135
Store,7,145
Store,8,133
Store,9,134
Store,10,132


### 5.1.3. Supplier Performance Metrics

In [20]:
%%sql
SELECT supplier_name, AVG(on_time_delivery_rate) as avg_delivery_rate
FROM suppliers
GROUP BY supplier_name
ORDER BY avg_delivery_rate DESC;

 * sqlite:///supply_chain.db
Done.


supplier_name,avg_delivery_rate
Supplier_28,0.99
Supplier_1,0.99
Supplier_15,0.98
Supplier_17,0.97
Supplier_18,0.96
Supplier_29,0.95
Supplier_13,0.94
Supplier_30,0.92
Supplier_12,0.91
Supplier_23,0.9


### 5.1.4. Monthly Sales by Product Category

In [24]:
%%sql

SELECT
    p.category AS product_category,
    strftime('%m', so.order_date) AS month,
    ROUND(SUM(oli.line_total), 2) AS net_revenue
FROM
    sales_orders so
    JOIN order_line_items oli ON so.order_id = oli.order_id
    JOIN products p ON oli.product_id = p.product_id
    JOIN customers c ON so.customer_id = c.customer_id
WHERE
    c.customer_type = 'Wholesale'
GROUP BY
    p.category,
    month
ORDER BY
    p.category ASC,
    month ASC,
    net_revenue DESC;


 * sqlite:///supply_chain.db
Done.


product_category,month,net_revenue
Apparel,1,44295.35
Apparel,2,36792.8
Apparel,3,38067.07
Apparel,4,64617.55
Apparel,5,35484.03
Apparel,6,46495.88
Apparel,7,23406.15
Apparel,8,61071.46
Apparel,9,25176.89
Apparel,10,48485.61


In [25]:
%%sql
-- Rolling Monthly Sales Growth by Product Category

WITH MonthlySales AS (
    SELECT
        p.category AS product_category,
        strftime('%m', so.order_date) AS order_month,
        SUM(oli.line_total) AS monthly_total_sales
    FROM
        sales_orders so
        JOIN order_line_items oli ON so.order_id = oli.order_id
        JOIN products p ON oli.product_id = p.product_id
    GROUP BY
        p.category,
        order_month
),
MonthlyGrowth AS (
    SELECT
        product_category,
        order_month,
        monthly_total_sales,
        LAG(monthly_total_sales) OVER (
            PARTITION BY product_category
            ORDER BY CAST(order_month AS INTEGER)
        ) AS prev_month_sales
    FROM
        MonthlySales
)
SELECT
    mg.product_category,
    mg.order_month,
    mg.monthly_total_sales,
    CASE
        WHEN mg.prev_month_sales IS NOT NULL AND mg.prev_month_sales != 0 THEN
            ROUND(((mg.monthly_total_sales - mg.prev_month_sales) / mg.prev_month_sales) * 100.0, 2)
        ELSE
            0.0
    END AS monthly_sales_growth_percentage
FROM
    MonthlyGrowth mg
ORDER BY
    mg.product_category ASC,
    mg.order_month ASC;


 * sqlite:///supply_chain.db
Done.


product_category,order_month,monthly_total_sales,monthly_sales_growth_percentage
Apparel,1,95598.83,0.0
Apparel,2,58678.25,-38.62
Apparel,3,57194.58,-2.53
Apparel,4,120887.71,111.36
Apparel,5,84708.94,-29.93
Apparel,6,177464.34,109.5
Apparel,7,145525.21,-18.0
Apparel,8,211761.69,45.52
Apparel,9,129995.17,-38.61
Apparel,10,139096.17,7.0


### 5.1.5. Customer Segment Insights

In [26]:
%%sql
-- Cross-Selling Opportunities

SELECT
    c.customer_segment,
    p.category AS product_category,
    SUM(oli.line_total) AS total_sales
FROM
    sales_orders so
    JOIN customers c ON so.customer_id = c.customer_id
    JOIN order_line_items oli ON so.order_id = oli.order_id
    JOIN products p ON oli.product_id = p.product_id
GROUP BY
    c.customer_segment,
    p.category
ORDER BY
    c.customer_segment ASC,
    total_sales DESC;


 * sqlite:///supply_chain.db
Done.


customer_segment,product_category,total_sales
Casual Shopper,Equipment,435006.7
Casual Shopper,Apparel,419670.92
Outdoor Enthusiast,Apparel,564373.14
Outdoor Enthusiast,Equipment,434585.37
Professional,Apparel,417015.63
Professional,Equipment,363807.13


In [29]:
%%sql
-- Average Order Value by Product Category and Payment Method for Retail Clients

WITH OrderTotals AS (
    SELECT
        so.order_id,
        p.category AS product_category,
        c.payment_terms AS payment_method,
        SUM(oli.line_total) AS order_total
    FROM
        sales_orders so
        JOIN order_line_items oli ON so.order_id = oli.order_id
        JOIN products p ON oli.product_id = p.product_id
        JOIN customers c ON so.customer_id = c.customer_id
    WHERE
        c.customer_type = 'Retail'
    GROUP BY
        so.order_id,
        p.category,
        c.payment_terms
)
SELECT
    product_category,
    payment_method,
    ROUND(AVG(order_total), 2) AS average_order_value
FROM
    OrderTotals
GROUP BY
    product_category,
    payment_method
ORDER BY
    product_category ASC,
    average_order_value DESC;


 * sqlite:///supply_chain.db
Done.


product_category,payment_method,average_order_value
Apparel,Prepaid,10.02
Equipment,Prepaid,9.06


In [31]:
%%sql
-- Payment Preferences by Client Type and Payment Method

SELECT
    c.customer_type,
    c.payment_terms AS payment_method,
    COUNT(DISTINCT so.order_id) AS order_count,
    SUM(so.total_order_value) AS total_sales
FROM
    sales_orders so
    JOIN customers c ON so.customer_id = c.customer_id
GROUP BY
    c.customer_type,
    c.payment_terms
ORDER BY
    c.customer_type ASC,
    total_sales DESC;



 * sqlite:///supply_chain.db
Done.


customer_type,payment_method,order_count,total_sales
E-commerce,Prepaid,20084,1307871.378
Retail,Prepaid,58835,1505191.049
Wholesale,Net 60,21081,1333891.052
