In [None]:
import matplotlib.pyplot as plt
import pandas as pd

fulfillment_center_data = pd.read_csv("./fulfilment_center_info.csv")
meal_info = pd.read_csv("./meal_info.csv")
order_info = pd.read_csv("./train.csv")

In [None]:
meals_and_orders = order_info.merge(
    meal_info, on="meal_id")[
        ['meal_id','category','cuisine','week', 'checkout_price', 'emailer_for_promotion','homepage_featured','num_orders']]

meals_and_orders_total = meals_and_orders.groupby("meal_id").agg({
    'cuisine': 'first',
    'category': "first",
    'checkout_price': 'mean',
    'num_orders': 'sum'
}).sort_values(by="num_orders")

grouped_fullfilment_center_by_region = fulfillment_center_data.merge(
    order_info, on='center_id')[['num_orders', 'center_id', 'op_area', 'region_code']].groupby(
    "region_code").agg({
    'op_area': 'sum',
    'num_orders': 'sum'
}).sort_values(by="num_orders")
    
grouped_fullfilment_center_by_city = fulfillment_center_data.merge(
    order_info, on='center_id')[['num_orders', 'center_id', 'op_area', 'city_code']].groupby(
    "city_code").agg({
    'op_area': 'sum',
    'num_orders': 'sum'
}).sort_values(by="num_orders")


In [None]:
fig, axis = plt.subplots(1, 3, figsize=(15, 5))

axis[0].barh(meals_and_orders_total["cuisine"], meals_and_orders_total['num_orders'], label="test")
axis[0].set_title("Numbers of orders by type of cuisine")

axis[1].barh(meals_and_orders_total['category'], meals_and_orders_total['num_orders'])
axis[1].set_title("Numbers of orders by food category")

axis[2].bar(meals_and_orders_total['checkout_price'], meals_and_orders_total['num_orders'])
axis[2].set_title("Numbers of orders by checkout price")

fig.tight_layout()
fig.show()

In [None]:
plot, sections = plt.subplots(1, 2, figsize=(15, 5))

sections[0].scatter(grouped_fullfilment_center_by_region["num_orders"], grouped_fullfilment_center_by_region["op_area"])
sections[0].set_title("Correlation between operation area an number of orders in region")

sections[1].scatter(grouped_fullfilment_center_by_city["num_orders"], grouped_fullfilment_center_by_city["op_area"])
sections[1].set_title("Correlation between operation area an number of orders in city")

plot.tight_layout()
plot.show()

In [None]:
order_data_by_promotion = order_info.groupby(['meal_id', 'emailer_for_promotion','homepage_featured']).agg({
    'num_orders': 'sum'
}).reset_index().sort_values(by='meal_id')

is_promoted_series = order_data_by_promotion['emailer_for_promotion'] | order_data_by_promotion['homepage_featured']

order_data_by_promotion = order_data_by_promotion.assign(is_promoted= is_promoted_series)

order_data_by_promotion = order_data_by_promotion.groupby(['meal_id', 'is_promoted']).agg({
    'num_orders': 'sum'
}).reset_index().sort_values(by='meal_id')

promoted = order_data_by_promotion[order_data_by_promotion['is_promoted'] == 1]
non_promoted = order_data_by_promotion[order_data_by_promotion['is_promoted'] == 0]

plt.plot(promoted['meal_id'], promoted['num_orders'], label="promoted")
plt.plot(non_promoted['meal_id'], non_promoted['num_orders'], label="non promoted")

plt.legend()

plt.xlabel("Meal Id")
plt.ylabel("Number of orders")

plt.title("Difference between promoted and non promoted orders")

plt.tight_layout()
plt.show()