In [113]:
import numpy as np
import pandas as pd

In [114]:
customer_orders = pd.read_csv('customer_orders.csv')
runner_orders = pd.read_csv('runner_orders.csv')
pizza_names = pd.read_csv('pizza_names.csv')
pizza_recipes = pd.read_csv('pizza_recipes.csv')
pizza_toppings = pd.read_csv('pizza_toppings.csv')

In [115]:
# modify values of "distance", and "duration" columns
runner_orders["distance"] = runner_orders["distance"] = runner_orders["distance"].str.replace('km','').astype(float)
runner_orders['duration'] = runner_orders['duration'].str.replace('mins|minutes|minute', '').astype(float)

  runner_orders['duration'] = runner_orders['duration'].str.replace('mins|minutes|minute', '').astype(float)


In [116]:
# unified data type of primary key, and foreign key
customer_orders['order_id'] = customer_orders['order_id'].astype(int)
customer_orders['pizza_id'] = customer_orders['pizza_id'].astype(int)
pizza_names['pizza_id'] = pizza_names['pizza_id'].astype(int)
pizza_toppings['topping_id'] = pizza_toppings['topping_id'].astype(int)

# Part I

Q1. How many pizzas were ordered?

In [117]:
customer_orders["order_id"].count()

14

Q2. How many unique customer orders was made?

In [118]:
customer_orders["order_id"].nunique()

10

Q3. How many successful orders was delivered by each runner?


In [119]:
# create source
successful_ordered = runner_orders['runner_id'].loc[runner_orders["cancellation"].isnull()].value_counts().reset_index()
successful_ordered.columns = ['runner_id', 'order_delivered']

# print result
successful_ordered

Unnamed: 0,runner_id,order_delivered
0,1,4
1,2,3
2,3,1


Q4. How many of each type of pizza was delivered?

In [120]:
# delivered orders
delivered_runners = runner_orders.loc[runner_orders['cancellation'].isnull()]

# merge customer orders, filter of delivered runners, and pizza names
pizza_delivered = (
    customer_orders.merge(delivered_runners, on='order_id', how='inner')
    .merge(pizza_names, on='pizza_id', how='left')
)

# count pizza types
pizza_delivered_counts = pizza_delivered['pizza_name'].value_counts().reset_index()
pizza_delivered_counts.columns = ['pizza_name', 'pizza_count']

# print result
pizza_delivered_counts

Unnamed: 0,pizza_name,pizza_count
0,Meatlovers,9
1,Vegetarian,3


Q5. How many Vegetarian and Meatlovers were ordered by each customer?

In [121]:
# create source
pizza_count_by_customer = (
    customer_orders.merge(pizza_names, on='pizza_id', how='left')
    .groupby(['customer_id', 'pizza_name'])
    .size()
    .reset_index(name='order_made')
    .sort_values('customer_id')
)

# print result
pizza_count_by_customer.sort_values('customer_id')

Unnamed: 0,customer_id,pizza_name,order_made
0,101,Meatlovers,2
1,101,Vegetarian,1
2,102,Meatlovers,2
3,102,Vegetarian,1
4,103,Meatlovers,3
5,103,Vegetarian,1
6,104,Meatlovers,3
7,105,Vegetarian,1


Q6. What was the maximum number of pizzas delivered in a single orders?

In [122]:
# create source
max_pizzas_delivered = pizza_delivered['order_id'].value_counts(ascending=False).reset_index(name='pizzas').head(1)
max_pizzas_delivered.columns = ['order_id', 'pizzas']

# print result
max_pizzas_delivered


Unnamed: 0,order_id,pizzas
0,4,3


Q7. For each customer, how many delivered pizzas had at least 1 change and how many had no changes?

In [123]:
# perform the inner join to get delivered orders
merged_data = customer_orders.merge(runner_orders[runner_orders['cancellation'].isnull()],
                                        on='order_id', how='inner')

# define conditions for sum calculations
merged_data['at_least_1_change'] = (
    merged_data['exclusions'].notnull() | merged_data['extras'].notnull()
).astype(int)

merged_data['no_change'] = (
    (merged_data['exclusions'].isnull() & merged_data['extras'].isnull())
).astype(int)

# group by 'customer_id' and perform the sums
result = merged_data.groupby('customer_id').agg({
    'at_least_1_change': 'sum',
    'no_change': 'sum'
}).reset_index().sort_values('customer_id')

# print the result
result

Unnamed: 0,customer_id,at_least_1_change,no_change
0,101,0,2
1,102,0,3
2,103,3,0
3,104,2,1
4,105,1,0


Q8. How many pizzas were delivered that had both exclusions and extras?

In [124]:
# define conditions for sum calculations
merged_data['change_extras_and_exclusion'] = (
    merged_data['exclusions'].notnull() & merged_data['extras'].notnull()
).astype(int)

# results
print(merged_data['change_extras_and_exclusion'].sum())

1


Q9. What was the total volume of pizzas ordered for each hour of the day?

In [129]:
# change date type of oder_time column to pd.datetime
customer_orders['order_time'] = pd.to_datetime(customer_orders['order_time'], format='%d/%m/%Y %H:%M:%S')

# extract the hour of the date
customer_orders['hour_of_the_day'] = customer_orders['order_time'].dt.hour

# define measure
result = customer_orders.groupby('hour_of_the_day').size().reset_index(name='pizza_ordered_count')

# print order result
print(result.sort_values('hour_of_the_day'))

   hour_of_the_day  pizza_ordered_count
0               11                    1
1               13                    3
2               18                    3
3               19                    1
4               21                    3
5               23                    3


Q10. What was the volume of orders for each day of the week?

In [130]:
# extract day name of the week
customer_orders['day_of_week'] = customer_orders['order_time'].dt.day_name()

# define measure
result = customer_orders.groupby('day_of_week').size().reset_index(name='pizza_order_count')

# day of week order
d_o_w_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

result['day_of_week'] = pd.Categorical(result['day_of_week'], categories=d_o_w_order, ordered=True)

# order result
print(result.sort_values('day_of_week'))

  day_of_week  pizza_order_count
3   Wednesday                  5
2    Thursday                  3
0      Friday                  1
1    Saturday                  5


# Part II