# Using Table and Data 

### Importing Libraries

In [1]:
import sqlite3

# create connection  and cursor object
connection = sqlite3.connect("pizza_runner_database")
cursor = connection.cursor()

# see the tables in database
query ="select name from sqlite_master  where type='table';"
cursor.execute(query)
result = cursor.fetchall()
print('Tables: {}'.format(result))

Tables: [('runners',), ('customer_orders',), ('runner_orders',), ('pizza_names',), ('pizza_recipes',), ('pizza_toppings',)]


### Data Cleaning and Transformation

In this we will directly create view of corrected data table. Overall process is discussed in clean_data.ipynb file

In [2]:
#for customer_orders table
cursor.execute("DROP View IF EXISTS c_view;")
query= """
create view c_view as 

select order_id, customer_id, pizza_id,
case
    when exclusions = 'null' or TRIM(exclusions) ="" then NULL
    else exclusions
end as exclusions,
case
    when extras = 'null' or TRIM(extras) ="" then NULL
    else extras
end as extras,
order_time

from customer_orders;


"""
cursor.execute(query)


# for runner_orders table
cursor.execute("DROP View IF EXISTS r_view;")
query= """
create view r_view as 

select order_id, runner_id,
case
    when pickup_time  = 'null'  then NULL
    else datetime(pickup_time)
end as pickup_time,
CASE
     WHEN distance = 'null' or TRIM(distance)= "" THEN NULL
     WHEN distance LIKE '%km' THEN cast(REPLACE(distance, 'km', '') as real)
     ELSE cast(distance as real) 
   END AS distance,
CASE
     WHEN duration = 'null' or TRIM(duration)="" THEN NULL
     WHEN duration LIKE '%mins' THEN cast(REPLACE(duration, 'mins', '') as integer)
     WHEN duration LIKE '%minute' THEN cast(REPLACE(duration, 'minute', '') as integer)
     WHEN duration LIKE '%minutes' THEN cast(REPLACE(duration, 'minutes', '') as integer)
     ELSE cast(duration as integer)
   END AS duration,
case
    when cancellation = 'null' or TRIM(cancellation) =""then NULL
    else cancellation
end as cancellation

from runner_orders;


"""
cursor.execute(query)

<sqlite3.Cursor at 0x2134956a3c0>

Calculate the Running Total of Distances:
Calculate the cumulative distance covered by runners for each order

In [6]:
query= """
select order_id,
    runner_id,
    distance,
    sum(distance) over(order by order_id
    ) as cumulative_distance
from r_view
where distance is not null;

"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(1, 1, 20.0, 20.0)
(2, 1, 20.0, 40.0)
(3, 1, 13.4, 53.4)
(4, 2, 23.4, 76.8)
(5, 3, 10.0, 86.8)
(7, 2, 25.0, 111.8)
(8, 2, 23.4, 135.2)
(10, 1, 10.0, 145.2)


Rank Orders by Distance:
Rank the orders based on the distance covered.

In [9]:
query= """
select order_id,
distance,
rank() over(order by distance desc) as order_rank
from r_view
where distance is not null

"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(7, 25.0, 1)
(4, 23.4, 2)
(8, 23.4, 2)
(1, 20.0, 4)
(2, 20.0, 4)
(3, 13.4, 6)
(5, 10.0, 7)
(10, 10.0, 7)


Calculate the Average Distance Per Runner:
Calculate the average distance covered by each runner using a window function.

In [13]:
query= """
select runner_id,
avg(distance) over(partition by runner_id) as avg_distance
from r_view

"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(1, 15.85)
(1, 15.85)
(1, 15.85)
(1, 15.85)
(2, 23.933333333333334)
(2, 23.933333333333334)
(2, 23.933333333333334)
(2, 23.933333333333334)
(3, 10.0)
(3, 10.0)


Calculate the time difference between consecutive orders

In [27]:
query= """
select a.order_id, 
case
    when round((julianday(a.order_time)-julianday(a.diff_time))*24*60) is null then 0
    else round((julianday(a.order_time)-julianday(a.diff_time))*24*60)
end as time_diff_in_minutes
from (
select order_id, order_time,
lag(order_time) over(order by order_time) as diff_time
from c_view
group by order_id,order_time) as a


"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(1, 0)
(2, 56.0)
(3, 1731.0)
(4, 2252.0)
(5, 6217.0)
(6, 3.0)
(7, 17.0)
(8, 1594.0)
(9, 688.0)
(10, 1872.0)


Calculate the Rolling Average Distance Over 2 Orders:
Calculate the rolling average distance covered over the last two orders.

In [33]:
query= """
select order_id, distance,
avg(distance) over(order by order_id 
                    rows between 1 preceding and current row
                    ) as avg_distance
from r_view


"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(1, 20.0, 20.0)
(2, 20.0, 20.0)
(3, 13.4, 16.7)
(4, 23.4, 18.4)
(5, 10.0, 16.7)
(6, None, 10.0)
(7, 25.0, 25.0)
(8, 23.4, 24.2)
(9, None, 23.4)
(10, 10.0, 10.0)


Determine the Fastest Pickup Times for Each Customer

In [42]:
query= """
select c.customer_id,
    min(r.pickup_time) over(partition by c.customer_id) as min_pickup_time

from c_view c inner join r_view r on
    c.order_id = r.order_id
where r.pickup_time is not null
group by c.customer_id

"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(101, '2020-01-01 18:15:34')
(102, '2020-01-03 00:12:37')
(103, '2020-01-04 13:53:03')
(104, '2020-01-08 21:10:57')
(105, '2020-01-08 21:30:45')


In [40]:
# lets solve this using row_number function

query= """
select a.*
from (select c.customer_id,c.order_id,
    r.pickup_time,
    row_number() over(partition by c.customer_id order by r.pickup_time) as row_num

from c_view c inner join r_view r on
    c.order_id = r.order_id
where r.pickup_time is not null) as a
where a.row_num =1

"""
cursor.execute(query)
result = cursor.fetchall()
for record in result:
    print(record)

(101, 1, '2020-01-01 18:15:34', 1)
(102, 3, '2020-01-03 00:12:37', 1)
(103, 4, '2020-01-04 13:53:03', 1)
(104, 5, '2020-01-08 21:10:57', 1)
(105, 7, '2020-01-08 21:30:45', 1)


In [43]:
if connection:
    connection.close()