In [1]:
import sqlite3
from sqlite3 import Error
import pandas as pd
import numpy as np

# Database

## Create a new database

In [2]:
# Database Path object for easier calling
db_path = r"db\Pizza_Runner.db"

In [3]:
# Create a new database file

def create_database(db_file):
    """ create a database connection to a SQLite database """
    conn = None
    # connect to database file
    try:
        conn = sqlite3.connect(db_file)
        print(f'sqlite3 version: {sqlite3.version}')
        print('Created Database')
    # print error if connection can not be established
    except Error as error:
        print(error)
        print('Could not create Database')
    # close connection
    finally:
        if conn:
            conn.close()
    

# run the above function (save database file to path)
if __name__ == '__main__':
    create_database(db_path)

sqlite3 version: 2.6.0
Created Database


## Create Tables

In [4]:
# function for establish connection to SQLite3 for editing database

def establish_connection(db_file):
    """ 
    creates a database connection to a SQLite database file
    
    :returns: Connection object or None
    """
    conn = None
    # connect to database file
    try:
        conn = sqlite3.connect(db_file)
        print(f'sqlite3 version: {sqlite3.version}')
        print('Connected to Database path')
    # print error if connection can not be established
    except Error as error:
        print(error)
        print('Could not create Database')
    
    return conn

In [5]:
conn = establish_connection(db_path)

sqlite3 version: 2.6.0
Connected to Database path


In [6]:
# function for creating table in database

def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    print('Creating tables')
    try:
        # make object cursor from connect
        c = conn.cursor()
        # from cursor use execute to create table
        c.execute(create_table_sql)
    except Error as error:
        print(error)

In [7]:
# Create database schema

def main():
    

    
    sql_create_table_runners = """ CREATE TABLE IF NOT EXISTS runners(
                                                                       runner_id INTEGER PRIMARY KEY NOT NULL,
                                                                       registration_date DATE,
                                                                       FOREIGN KEY (runner_id) REFERENCES runner_orders (runner_id)
                                                                     );"""
    
    sql_create_table_customer_orders = """ CREATE TABLE IF NOT EXISTS customer_orders(
                                                                                       order_id INTEGER PRIMARY KEY NOT NULL,
                                                                                       customer_id INTEGER,
                                                                                       pizza_id INTEGER,
                                                                                       exclusions VARCHAR(8),
                                                                                       extras VARCHAR(8),
                                                                                       order_time TIMESTAMP
                                                                                     );"""
    
    sql_create_table_runner_orders = """ CREATE TABLE IF NOT EXISTS runner_orders(
                                                                                   order_id INTEGER PRIMARY KEY NOT NULL,
                                                                                   runner_id INTEGER,
                                                                                   pickup_time VARCHAR(19),
                                                                                   distance VARCHAR(7),
                                                                                   duration VARCHAR(10),
                                                                                   cancellation VARCHAR(23),
                                                                                   FOREIGN KEY (runner_id) REFERENCES runner (runner_id)
                                                                                   FOREIGN KEY (order_id) REFERENCES customer_orders (order_id)
                                                                                 );"""
    
    sql_create_table_pizza_names = """ CREATE TABLE IF NOT EXISTS pizza_names(
                                                                               pizza_id INTEGER NOT NULL,
                                                                               pizza_name TEXT,
                                                                               FOREIGN KEY (pizza_id) REFERENCES customer_orders (pizza_id)
                                                                             );"""
    
    sql_create_table_pizza_toppings = """ CREATE TABLE IF NOT EXISTS pizza_toppings(
                                                                                     topping_id INTEGER PRIMARY KEY NOT NULL,
                                                                                     topping_name TEXT
                                                                                   );"""
    
    sql_create_table_pizza_recipes = """ CREATE TABLE IF NOT EXISTS pizza_recipes(
                                                                                   pizza_id INTEGER PRIMARY KEY NOT NULL,
                                                                                   topping_name TEXT,
                                                                                   FOREIGN KEY (pizza_id) REFERENCES customer_orders (pizza_id)
                                                                                 );"""
    

    # create a database connection
    

    # create tables
    if conn is not None:
        # create sales table
        create_table(conn, sql_create_table_runners)
        create_table(conn, sql_create_table_customer_orders)
        create_table(conn, sql_create_table_runner_orders)
        create_table(conn, sql_create_table_pizza_names)
        create_table(conn, sql_create_table_pizza_toppings)
        create_table(conn, sql_create_table_pizza_recipes)
        print("Created tables")
    else:
        print("Error! cannot create the database connection.")
        
    

    
if __name__ == '__main__':
    main()

Creating tables
Creating tables
Creating tables
Creating tables
Creating tables
Creating tables
Created tables


## Insert data into Tables

In [8]:
conn = establish_connection(db_path)

sqlite3 version: 2.6.0
Connected to Database path


In [9]:
runners_values =  [
                   (1, '2021-01-01'),
                   (2, '2021-01-03'),
                   (3, '2021-01-08'),
                   (4, '2021-01-15')
                  ]
runners_table = pd.DataFrame(data=runners_values,columns=["runner_id", "registration_date"])
display(runners_table)
runners_table.to_sql('runners',sqlite3.connect(db_path), if_exists='replace')

Unnamed: 0,runner_id,registration_date
0,1,2021-01-01
1,2,2021-01-03
2,3,2021-01-08
3,4,2021-01-15


In [10]:
customer_orders_values =  [
                           ('1', '101', '1', '', '', '2020-01-01 18:05:02'),
                           ('2', '101', '1', '', '', '2020-01-01 19:00:52'),
                           ('3', '102', '1', '', '', '2020-01-02 23:51:23'),
                           ('3', '102', '2', '', np.nan, '2020-01-02 23:51:23'),
                           ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
                           ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
                           ('4', '103', '2', '4', '', '2020-01-04 13:23:46'),
                           ('5', '104', '1', 'null', '1', '2020-01-08 21:00:29'),
                           ('6', '101', '2', 'null', 'null', '2020-01-08 21:03:13'),
                           ('7', '105', '2', 'null', '1', '2020-01-08 21:20:29'),
                           ('8', '102', '1', 'null', 'null', '2020-01-09 23:54:33'),
                           ('9', '103', '1', '4', '1, 5', '2020-01-10 11:22:59'),
                           ('10', '104', '1', 'null', 'null', '2020-01-11 18:34:49'),
                           ('10', '104', '1', '2, 6', '1, 4', '2020-01-11 18:34:49')
                          ]

customer_orders_table = pd.DataFrame(data=customer_orders_values,columns=['order_id','customer_id','pizza_id','exclusions','extras','order_time'])
display(customer_orders_table)
customer_orders_table.to_sql('customer_orders',sqlite3.connect(db_path), if_exists='replace')

Unnamed: 0,order_id,customer_id,pizza_id,exclusions,extras,order_time
0,1,101,1,,,2020-01-01 18:05:02
1,2,101,1,,,2020-01-01 19:00:52
2,3,102,1,,,2020-01-02 23:51:23
3,3,102,2,,,2020-01-02 23:51:23
4,4,103,1,4,,2020-01-04 13:23:46
5,4,103,1,4,,2020-01-04 13:23:46
6,4,103,2,4,,2020-01-04 13:23:46
7,5,104,1,,1,2020-01-08 21:00:29
8,6,101,2,,,2020-01-08 21:03:13
9,7,105,2,,1,2020-01-08 21:20:29


In [11]:
runner_orders_values = [ 
                          ('1', '1', '2020-01-01 18:15:34', '20km', '32 minutes', ''),
                          ('2', '1', '2020-01-01 19:10:54', '20km', '27 minutes', ''),
                          ('3', '1', '2020-01-03 00:12:37', '13.4km', '20 mins', np.nan),
                          ('4', '2', '2020-01-04 13:53:03', '23.4', '40', np.nan),
                          ('5', '3', '2020-01-08 21:10:57', '10', '15', np.nan),
                          ('6', '3', 'null', 'null', 'null', 'Restaurant Cancellation'),
                          ('7', '2', '2020-01-08 21:30:45', '25km', '25mins', 'null'),
                          ('8', '2', '2020-01-10 00:15:02', '23.4 km', '15 minute', 'null'),
                          ('9', '2', 'null', 'null', 'null', 'Customer Cancellation'),
                          ('10', '1', '2020-01-11 18:50:20', '10km', '10minutes', 'null')
                        ]

runner_orders_table = pd.DataFrame(data=runner_orders_values,columns=['order_id','runner_id','pickup_time','distance','duration','cancellation'])
display(runner_orders_table)
runner_orders_table.to_sql('runner_orders',sqlite3.connect(db_path), if_exists='replace')

Unnamed: 0,order_id,runner_id,pickup_time,distance,duration,cancellation
0,1,1,2020-01-01 18:15:34,20km,32 minutes,
1,2,1,2020-01-01 19:10:54,20km,27 minutes,
2,3,1,2020-01-03 00:12:37,13.4km,20 mins,
3,4,2,2020-01-04 13:53:03,23.4,40,
4,5,3,2020-01-08 21:10:57,10,15,
5,6,3,,,,Restaurant Cancellation
6,7,2,2020-01-08 21:30:45,25km,25mins,
7,8,2,2020-01-10 00:15:02,23.4 km,15 minute,
8,9,2,,,,Customer Cancellation
9,10,1,2020-01-11 18:50:20,10km,10minutes,


In [12]:
pizza_names_values =  [
                        (1, 'Meatlovers'),
                        (2, 'Vegetarian')
                      ]
pizza_names_table = pd.DataFrame(data=pizza_names_values,columns=['pizza_id','pizza_name'])
display(pizza_names_table)
pizza_names_table.to_sql('pizza_names',sqlite3.connect(db_path), if_exists='replace')

Unnamed: 0,pizza_id,pizza_name
0,1,Meatlovers
1,2,Vegetarian


In [13]:
pizza_recipes_values =  [
                          (1, '1, 2, 3, 4, 5, 6, 8, 10'),
                          (2, '4, 6, 7, 9, 11, 12')
                        ]
pizza_recipes_table = pd.DataFrame(data=pizza_recipes_values,columns=['pizza_id','toppings'])
display(pizza_recipes_table)
pizza_recipes_table.to_sql('pizza_recipes',sqlite3.connect(db_path), if_exists='replace')

Unnamed: 0,pizza_id,toppings
0,1,"1, 2, 3, 4, 5, 6, 8, 10"
1,2,"4, 6, 7, 9, 11, 12"


In [14]:
pizza_toppings_values =  [
                          (1, 'Bacon'),
                          (2, 'BBQ Sauce'),
                          (3, 'Beef'),
                          (4, 'Cheese'),
                          (5, 'Chicken'),
                          (6, 'Mushrooms'),
                          (7, 'Onions'),
                          (8, 'Pepperoni'),
                          (9, 'Peppers'),
                          (10, 'Salami'),
                          (11, 'Tomatoes'),
                          (12, 'Tomato Sauce')
                        ]
pizza_toppings_table = pd.DataFrame(data=pizza_toppings_values,columns=['topping_id','topping_name'])
display(pizza_toppings_table)
pizza_toppings_table.to_sql('pizza_toppings',sqlite3.connect(db_path), if_exists='replace')

Unnamed: 0,topping_id,topping_name
0,1,Bacon
1,2,BBQ Sauce
2,3,Beef
3,4,Cheese
4,5,Chicken
5,6,Mushrooms
6,7,Onions
7,8,Pepperoni
8,9,Peppers
9,10,Salami


## Close connection

In [15]:
# close connection to database

def close_connection(db_file):
    try:
        conn = sqlite3.connect(db_file)
        conn.close()
        print('Connection successfully closed')
    except Error as error:
        print(error)
        print('Could not close connection')
# run the above function (save database file to path)
if __name__ == '__main__':
    close_connection(db_path)

Connection successfully closed


# Querying data with conditions and save to Pandas Dataframe

In [16]:
conn = sqlite3.connect(db_path)

## Pizza Metrics

### Display of tables

In [17]:
display("Runners Table",pd.read_sql_query("""SELECT * from runners;""", conn))


display("Customer Orders Table",pd.read_sql_query("""SELECT * from customer_orders;""", conn))


display("Runners Orders Table",pd.read_sql_query("""SELECT * from runner_orders;""", conn))


display("Pizza Names Table",pd.read_sql_query("""SELECT * from pizza_names;""", conn))


display("Pizza Recipes Table",pd.read_sql_query("""SELECT * from pizza_recipes;""", conn))


display("Pizza Toppings Table",pd.read_sql_query("""SELECT * from pizza_toppings;""", conn))

'Runners Table'

Unnamed: 0,index,runner_id,registration_date
0,0,1,2021-01-01
1,1,2,2021-01-03
2,2,3,2021-01-08
3,3,4,2021-01-15


'Customer Orders Table'

Unnamed: 0,index,order_id,customer_id,pizza_id,exclusions,extras,order_time
0,0,1,101,1,,,2020-01-01 18:05:02
1,1,2,101,1,,,2020-01-01 19:00:52
2,2,3,102,1,,,2020-01-02 23:51:23
3,3,3,102,2,,,2020-01-02 23:51:23
4,4,4,103,1,4,,2020-01-04 13:23:46
5,5,4,103,1,4,,2020-01-04 13:23:46
6,6,4,103,2,4,,2020-01-04 13:23:46
7,7,5,104,1,,1,2020-01-08 21:00:29
8,8,6,101,2,,,2020-01-08 21:03:13
9,9,7,105,2,,1,2020-01-08 21:20:29


'Runners Orders Table'

Unnamed: 0,index,order_id,runner_id,pickup_time,distance,duration,cancellation
0,0,1,1,2020-01-01 18:15:34,20km,32 minutes,
1,1,2,1,2020-01-01 19:10:54,20km,27 minutes,
2,2,3,1,2020-01-03 00:12:37,13.4km,20 mins,
3,3,4,2,2020-01-04 13:53:03,23.4,40,
4,4,5,3,2020-01-08 21:10:57,10,15,
5,5,6,3,,,,Restaurant Cancellation
6,6,7,2,2020-01-08 21:30:45,25km,25mins,
7,7,8,2,2020-01-10 00:15:02,23.4 km,15 minute,
8,8,9,2,,,,Customer Cancellation
9,9,10,1,2020-01-11 18:50:20,10km,10minutes,


'Pizza Names Table'

Unnamed: 0,index,pizza_id,pizza_name
0,0,1,Meatlovers
1,1,2,Vegetarian


'Pizza Recipes Table'

Unnamed: 0,index,pizza_id,toppings
0,0,1,"1, 2, 3, 4, 5, 6, 8, 10"
1,1,2,"4, 6, 7, 9, 11, 12"


'Pizza Toppings Table'

Unnamed: 0,index,topping_id,topping_name
0,0,1,Bacon
1,1,2,BBQ Sauce
2,2,3,Beef
3,3,4,Cheese
4,4,5,Chicken
5,5,6,Mushrooms
6,6,7,Onions
7,7,8,Pepperoni
8,8,9,Peppers
9,9,10,Salami


### Case Study Questions

1) How many pizzas were ordered?

In [18]:
total_pizzas_ordered =  pd.read_sql_query("""
                                             select count(order_id) total_pizzas_ordered
                                             from customer_orders
                                          """,conn)
total_pizzas_ordered

Unnamed: 0,total_pizzas_ordered
0,14


2) How many unique customer orders were made?

In [21]:
total_unique_customer_orders =  pd.read_sql_query("""
                                                     select count(distinct(order_id)) total_unique_customer_orders
                                                     from customer_orders
                                                  """,conn)
total_unique_customer_orders

Unnamed: 0,total_unique_customer_orders
0,10


3) How many successful orders were delivered by each runner?

4) How many of each type of pizza was delivered?

5) How many Vegetarian and Meatlovers were ordered by each customer?

6) What was the maximum number of pizzas delivered in a single order?

7) For each customer, how many delivered pizzas had at least 1 change and how many had no changes?

8) How many pizzas were delivered that had both exclusions and extras?

9) What was the total volume of pizzas ordered for each hour of the day?

10) What was the volume of orders for each day of the week?