In [1]:
import sqlite3
%load_ext sql

In [2]:
%sql postgresql://postgres:password@localhost:5432/sql_challenge

## Data cleaning

In [4]:
%%sql
DROP TABLE IF EXISTS clean_pizza_recipes;

SELECT pizza_id, UNNEST(STRING_TO_ARRAY(toppings, ',')) AS toppings
INTO clean_pizza_recipes
FROM pizza_recipes;

SELECT *
FROM clean_pizza_recipes;

 * postgresql://postgres:***@localhost:5432/sql_challenge
Done.
14 rows affected.
14 rows affected.


pizza_id,toppings
1,1
1,2
1,3
1,4
1,5
1,6
1,8
1,10
2,4
2,6


In [5]:
%%sql

DROP TABLE IF EXISTS clean_customer_orders;

SELECT order_id, customer_id, pizza_id, 
  CASE 
    WHEN exclusions LIKE 'null' OR exclusions LIKE '' THEN NULL
    ELSE exclusions
    END AS exclusions,
  CASE 
    WHEN extras LIKE 'null' OR extras LIKE '' THEN NULL
    ELSE extras 
    END AS extras, 
  order_time
INTO clean_customer_orders
FROM customer_orders;

SELECT *
FROM clean_customer_orders;

 * postgresql://postgres:***@localhost:5432/sql_challenge
Done.
14 rows affected.
14 rows affected.


order_id,customer_id,pizza_id,exclusions,extras,order_time
1,101,1,,,2020-01-01 18:05:02
2,101,1,,,2020-01-01 19:00:52
3,102,1,,,2020-01-02 23:51:23
3,102,2,,,2020-01-02 23:51:23
4,103,1,4,,2020-01-04 13:23:46
4,103,1,4,,2020-01-04 13:23:46
4,103,2,4,,2020-01-04 13:23:46
5,104,1,,1,2020-01-08 21:00:29
6,101,2,,,2020-01-08 21:03:13
7,105,2,,1,2020-01-08 21:20:29


In [6]:
%%sql

DROP TABLE IF EXISTS clean_runner_orders;

SELECT order_id, runner_id,
  CASE 
    WHEN pickup_time LIKE 'null' OR pickup_time LIKE '' THEN NULL
    ELSE pickup_time 
    END AS pickup_time,
  CASE 
    WHEN distance LIKE 'null' OR distance LIKE '' THEN NULL
    WHEN distance LIKE '%km' THEN TRIM('km' from distance) 
    ELSE distance END AS distance,
  CASE 
    WHEN duration LIKE 'null' OR duration LIKE '' THEN NULL 
    WHEN duration LIKE '%mins' THEN TRIM('mins' from duration) 
    WHEN duration LIKE '%minute' THEN TRIM('minute' from duration)        
    WHEN duration LIKE '%minutes' THEN TRIM('minutes' from duration)       
    ELSE duration END AS duration,
  CASE 
    WHEN cancellation LIKE 'null' OR cancellation LIKE '' THEN NULL
    ELSE cancellation END AS cancellation
INTO clean_runner_orders
FROM runner_orders;

ALTER TABLE clean_runner_orders
ALTER COLUMN pickup_time TYPE TIMESTAMP USING pickup_time::timestamp,
ALTER COLUMN distance TYPE FLOAT USING distance::double precision,
ALTER COLUMN duration TYPE INT USING duration::integer;

SELECT *
FROM clean_runner_orders;

 * postgresql://postgres:***@localhost:5432/sql_challenge
Done.
10 rows affected.
Done.
10 rows affected.


order_id,runner_id,pickup_time,distance,duration,cancellation
1,1,2020-01-01 18:15:34,20.0,32.0,
2,1,2020-01-01 19:10:54,20.0,27.0,
3,1,2020-01-03 00:12:37,13.4,20.0,
4,2,2020-01-04 13:53:03,23.4,40.0,
5,3,2020-01-08 21:10:57,10.0,15.0,
6,3,,,,Restaurant Cancellation
7,2,2020-01-08 21:30:45,25.0,25.0,
8,2,2020-01-10 00:15:02,23.4,15.0,
9,2,,,,Customer Cancellation
10,1,2020-01-11 18:50:20,10.0,10.0,


## Pizza Metrics

1. How many pizzas were ordered?

In [7]:
%%sql
SELECT COUNT(*)
FROM clean_customer_orders;

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


count
14


14 Pizzas were ordered

2. How many unique customer orders were made?

In [8]:
%%sql
SELECT COUNT(DISTINCT order_id)
FROM clean_customer_orders;

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


count
10


10 unique customer orders were made

3. How many successful orders were delivered by each runner?

In [37]:
%%sql
SELECT runner_id, COUNT(*)
FROM clean_runner_orders
WHERE pickup_time IS NOT NULL
GROUP BY runner_id;

 * postgresql://postgres:***@localhost:5432/sql_challenge
3 rows affected.


runner_id,count
3,1
2,3
1,4


Runner 3 had 1 successful order, runner 2 had 3 successful order, runner 1 had 4 successful order

4. How many of each type of pizza was delivered?

In [49]:
%%sql
SELECT co.pizza_id, pn.pizza_name, COUNT(*)
FROM clean_customer_orders co
    JOIN clean_runner_orders ro 
    ON co.order_id = ro.order_id
    JOIN pizza_names pn
    ON co.pizza_id = pn.pizza_id
WHERE ro.pickup_time IS NOT NULL
GROUP BY 1, 2;

 * postgresql://postgres:***@localhost:5432/sql_challenge
2 rows affected.


pizza_id,pizza_name,count
1,Meatlovers,9
2,Vegetarian,3


9 meatlovers and 3 vegetarian pizzas were delivered

5. How many Vegetarian and Meatlovers were ordered by each customer?

In [53]:
%%sql
SELECT co.customer_id, pn.pizza_name, COUNT(*)
FROM clean_customer_orders co
    JOIN pizza_names pn
    ON co.pizza_id = pn.pizza_id
GROUP BY 1, 2
ORDER BY 1;

 * postgresql://postgres:***@localhost:5432/sql_challenge
8 rows affected.


customer_id,pizza_name,count
101,Meatlovers,2
101,Vegetarian,1
102,Meatlovers,2
102,Vegetarian,1
103,Meatlovers,3
103,Vegetarian,1
104,Meatlovers,3
105,Vegetarian,1


Customer 101 ordered 2 meatlovers and 1 vegetarian, customer 102 ordered 2 meatlovers and 1 vegetarian, customer 103 ordered 3 meatlovers and 1 vegetarial, customer 104 ordered 3 meatlovers, customer 105 ordered 1 vegetarian

6. What was the maximum number of pizzas delivered in a single order?

In [37]:
%%sql
SELECT MAX(count) FROM(
    SELECT co.order_id, COUNT(*)
    FROM clean_customer_orders co
        JOIN clean_runner_orders ro 
        ON co.order_id = ro.order_id
    WHERE ro.pickup_time IS NOT NULL
    GROUP BY 1) AS orders

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


max
3


In [43]:
%%sql
SELECT COUNT(*)
    FROM clean_customer_orders co
        JOIN clean_runner_orders ro 
        ON co.order_id = ro.order_id
    WHERE ro.pickup_time IS NOT NULL
    GROUP BY co.order_id
    ORDER BY 1 DESC
    LIMIT 1

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


count
3


The highest pizza delivered in a single order was 3

7. For each customer, how many delivered pizzas had at least 1 change and how many had no changes?

In [70]:
%%sql
SELECT customer_id,
    SUM(CASE WHEN exclusions IS NOT NULL OR extras IS NOT NULL THEN 1
    ELSE 0 END) AS change
    FROM clean_customer_orders
    -- INCLUDE THE IS NOT NULL STATEMENT
GROUP BY 1
ORDER BY 1;

 * postgresql://postgres:***@localhost:5432/sql_challenge
5 rows affected.


customer_id,change
101,0
102,0
103,4
104,2
105,1


Customer 101 and 102 had 0 change while customer 103 , 104 and 105 had 4,2 and 1 change respectively

8. How many pizzas were delivered that had both exclusions and extras?

In [75]:
%%sql
SELECT 
    SUM(CASE WHEN exclusions IS NOT NULL AND extras IS NOT NULL THEN 1
    ELSE 0 END) AS number
FROM clean_customer_orders co
    JOIN clean_runner_orders ro 
    ON co.order_id = ro.order_id
    WHERE ro.pickup_time IS NOT NULL;

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


number
1


In [44]:
%%sql
SELECT COUNT(*)
FROM clean_customer_orders co
    JOIN clean_runner_orders ro 
    ON co.order_id = ro.order_id
    WHERE ro.pickup_time IS NOT NULL 
        AND exclusions IS NOT NULL 
        AND extras IS NOT NULL;

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


count
1


only 1 pizza was delivered with both exclusions and extras 

9. What was the total volume of pizzas ordered for each hour of the day?

In [85]:
%%sql
SELECT DATE_PART('hour', order_time)::INTEGER AS hour, COUNT(*)
FROM clean_customer_orders
GROUP BY 1
ORDER BY 1;

 * postgresql://postgres:***@localhost:5432/sql_challenge
6 rows affected.


hour,count
11,1
13,3
18,3
19,1
21,3
23,3


1pm, 6pm and 9pm and 11pm had the highest count of pizza at 3, 11am and 7pm had the lowest count at 1

10. What was the volume of orders for each day of the week?

In [89]:
%%sql
SELECT to_char(order_time, 'Day') AS day, COUNT(*)
FROM clean_customer_orders
GROUP BY 1;

 * postgresql://postgres:***@localhost:5432/sql_challenge
4 rows affected.


day,count
Saturday,5
Thursday,3
Friday,1
Wednesday,5


Saturday and Wednesday got the highest with 5 orders, Thursday has 3 orders and Friday has only 1 order.

## Runner and Customer Experience

1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)

In [53]:
%%sql
SELECT DATE_PART('week', registration_date + interval '3 days')::INTEGER AS week, COUNT(*)
FROM runners
GROUP BY 1
ORDER BY 1;

 * postgresql://postgres:***@localhost:5432/sql_challenge
3 rows affected.


week,count
1,2
2,1
3,1


2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?

In [24]:
%%sql
SELECT DATE_PART('minutes', AVG(minutes))::integer 
FROM (
    SELECT (pickup_time - order_time) AS minutes
    FROM clean_customer_orders AS co
        JOIN clean_runner_orders AS ro
        ON co.order_id = ro.order_id
        WHERE ro.pickup_time IS NOT NULL) AS arrival_time

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


date_part
18.0


There is an average of 18 minutes between order time and pickup time

3. Is there any relationship between the number of pizzas and how long the order takes to prepare?

In [30]:
%%sql
SELECT count, DATE_PART('minutes', AVG(minutes))::integer 
FROM(
    SELECT co.order_id, count(*), (pickup_time - order_time) AS minutes
    FROM clean_customer_orders AS co
        JOIN clean_runner_orders AS ro
        ON co.order_id = ro.order_id
    WHERE ro.pickup_time IS NOT NULL
    GROUP BY 1,3) AS cpm
GROUP BY 1

 * postgresql://postgres:***@localhost:5432/sql_challenge
3 rows affected.


count,date_part
3,29
2,18
1,12


More pizzas take more time to prepare on average

4. What was the average distance traveled for each customer?

In [39]:
%%sql
SELECT co.customer_id, ROUND(AVG(ro.distance)::numeric, 2)
FROM clean_customer_orders AS co
    JOIN clean_runner_orders AS ro
    ON co.order_id = ro.order_id
WHERE ro.pickup_time IS NOT NULL
GROUP BY 1
ORDER BY 1;

 * postgresql://postgres:***@localhost:5432/sql_challenge
5 rows affected.


customer_id,round
101,20.0
102,16.73
103,23.4
104,10.0
105,25.0


Customer 101, 102, 103, 104, 105 have a travel distance of 20km, 16.73km, 23.40km, 10.0km, 25.0km respectively 

5. What was the difference between the longest and shortest delivery times for all orders?

In [43]:
%%sql
SELECT MAX(duration) - MIN(duration) AS difference
FROM clean_runner_orders

 * postgresql://postgres:***@localhost:5432/sql_challenge
1 rows affected.


difference
30


There is a difference of 30 minutes between the longest and shortest delivery times

6. What was the average speed for each runner for each delivery and do you notice any trend for these values?

In [54]:
%%sql
SELECT order_id, runner_id, ROUND(AVG(distance/duration*60)::numeric, 2) AS speed
FROM clean_runner_orders
WHERE pickup_time IS NOT NULL
GROUP BY 1, 2
ORDER BY 2

 * postgresql://postgres:***@localhost:5432/sql_challenge
8 rows affected.


order_id,runner_id,speed
10,1,60.0
2,1,44.44
1,1,37.5
3,1,40.2
7,2,60.0
4,2,35.1
8,2,93.6
5,3,40.0


Runner 2 is generally faster than runner 1 and 3

7. What is the successful delivery percentage for each runner?

In [62]:
%%sql
SELECT runner_id,
    (100 * SUM(CASE 
        WHEN cancellation IS NULL THEN 1
        ELSE 0 END)/ COUNT(*)) AS success
FROM clean_runner_orders
GROUP BY 1


 * postgresql://postgres:***@localhost:5432/sql_challenge
3 rows affected.


runner_id,success
3,50
2,75
1,100


Runner 1 had 100% success, runner 2 had 75% and runner 3 had 50%

## Ingredient Optimisation

1. What are the standard ingredients for each pizza?

In [8]:
%%sql
SELECT pizza_name, topping_name
FROM clean_pizza_recipes pr
JOIN pizza_names pn
ON pr.pizza_id = pn.pizza_id
JOIN pizza_toppings pt
ON pr.toppings::integer = pt.topping_id


 * postgresql://postgres:***@localhost:5432/sql_challenge
14 rows affected.


pizza_name,topping_name
Meatlovers,BBQ Sauce
Meatlovers,Pepperoni
Meatlovers,Cheese
Meatlovers,Salami
Meatlovers,Chicken
Meatlovers,Bacon
Meatlovers,Mushrooms
Meatlovers,Beef
Vegetarian,Tomato Sauce
Vegetarian,Cheese


In [34]:
%%sql
SELECT 
CASE WHEN pizza_name = 'Meatlovers' THEN topping_name END AS meatlovers,
CASE WHEN pizza_name = 'Vegetarian' THEN topping_name END AS vegetarian
FROM (
SELECT pizza_name, topping_name
FROM clean_pizza_recipes pr
JOIN pizza_names pn
ON pr.pizza_id = pn.pizza_id
JOIN pizza_toppings pt
ON pr.toppings::integer = pt.topping_id) AS foo


 * postgresql://postgres:***@localhost:5432/sql_challenge
14 rows affected.


meatlovers,vegetarian
BBQ Sauce,
Pepperoni,
Cheese,
Salami,
Chicken,
Bacon,
Mushrooms,
Beef,
,Tomato Sauce
,Cheese


In [None]:
%%sql

In [None]:
%%sql