In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Food demand forecasting


#### In this jupyter notebook we will perform some EDA on orders with data of the center and the meals


In [None]:
# Train dataset.

train_data = pd.read_csv("/kaggle/input/food-demand-forecasting/train.csv")
print("###### Meal dataset has {} rows and {} columns #######".format(train_data.shape[0], train_data.shape[1]))
train_data.head()

In [None]:
# Meal dataset.

meal_info_data = pd.read_csv("/kaggle/input/food-demand-forecasting/meal_info.csv")
print("###### Meal dataset has {} rows and {} columns #######".format(meal_info_data.shape[0], meal_info_data.shape[1]))
meal_info_data.head()

In [None]:
# Meal dataset.

fulfilment_center_info_data = pd.read_csv("/kaggle/input/food-demand-forecasting/fulfilment_center_info.csv")
print("###### Center info has {} rows and {} columns #######".format(fulfilment_center_info_data.shape[0], fulfilment_center_info_data.shape[1]))
fulfilment_center_info_data.head()

In [None]:
first_join = train_data.merge(meal_info_data, on="meal_id", how="inner")
full_train_data = first_join.merge(fulfilment_center_info_data, on="center_id", how="inner")
full_train_data.head()

In [None]:
# Searching for null values.
null_values_array = full_train_data.isnull().sum().values
print("##### No null values #######") if np.sum(null_values_array) == 0 else  print("####### You have null values in your dataset ########")
full_train_data.dtypes

## 1) Data visualization


#### Data visualization will start with a first section having barplot consisting of groupbys, in order to explore the number of orders by other columns such as (region, cuisine, category...)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


plt.style.use('fivethirtyeight') 

plot_kwargs = {
    "figsize": (20, 5),
    "fontsize": 13,
}
full_train_data.groupby(["category"])["num_orders"].sum().sort_values(ascending=False).plot.bar(**plot_kwargs)
plt.ylabel("Number of orders")
plt.title("Orders by Categories in Millions by category")
plt.xticks(rotation=25);

In [None]:
full_train_data.groupby(["cuisine"])["num_orders"].sum().sort_values(ascending=False).plot.bar(**plot_kwargs)
plt.ylabel("Number of orders")
plt.title("Orders by Cuisine in Millions")
plt.xticks(rotation=25);

In [None]:
full_train_data.groupby(["region_code"])["num_orders"].sum().sort_values(ascending=False).plot.bar(**plot_kwargs)
plt.ylabel("Number of order")
plt.title("Number of orders by region code in Millions")
plt.xticks(rotation=25);

In [None]:
full_train_data.groupby(["city_code"])["num_orders"].sum().sort_values(ascending=False).plot.bar(**plot_kwargs)
plt.ylabel("Number of order")
plt.title("Number of orders by region code in Millions")
plt.xticks(rotation=25);

In [None]:
full_train_data.groupby(["center_type"])["num_orders"].sum().sort_values(ascending=False).plot.bar(**plot_kwargs)
plt.ylabel("Number of orders")
plt.title("Orders by center type in Millions")
plt.xticks(rotation=25);

### Okay now let's plot the number of order by week for each center


#### Next we'll have lineplots of numbers of order by weeks

In [None]:
import seaborn as sns


sns.set(rc={'figure.figsize':(15, 8)})
sns.lineplot(x="week", y="num_orders", data=full_train_data, hue="category")

### We can see that there are some interesting peaks within the Rice and Sandwitch categories

In [None]:
full_train_data

In [None]:
sns.lineplot(x="week", y="num_orders", data=full_train_data, hue="cuisine")

In [None]:
sns.lineplot(x="week", y="num_orders", data=full_train_data, hue="center_type")

In [None]:
sns.lineplot(x="week", y="num_orders", data=full_train_data, hue="region_code")

In [None]:
sns.lineplot(x="week", y="num_orders", data=full_train_data, hue="city_code")

In [None]:
sns.jointplot(
    data=full_train_data,
    x="base_price", y="checkout_price", hue="cuisine",
    kind="kde",
)

#### Base price and checkout price are pretty much similar for each command by cuisine, nevertheless there are some instances where base price exceeds the checkout price. The most noticable one is within the Continental cuisine.

In [None]:
def single_base_price(data):
    return data["base_price"] / data["num_orders"]


def single_checkout_price(data):
    return data["checkout_price"] / data["num_orders"]


full_train_data["single_base_price"] = full_train_data.apply(lambda x: single_base_price(x), axis=1)
full_train_data["single_checkout_price"] = full_train_data.apply(lambda x: single_checkout_price(x), axis=1)

In [None]:
full_train_data.head()

In [None]:
sns.lineplot(x="week", y="single_base_price", data=full_train_data, hue="category")

In [None]:
sns.lineplot(x="week", y="single_base_price", data=full_train_data, hue="cuisine")

#### It's interesting to see how Indian Continental food are much expenive than other cuisines, this can be seen better by violon or boxplots

In [None]:
sns.violinplot(x="cuisine", y="single_base_price", data=full_train_data, palette="Set3", bw=.2, cut=1, linewidth=1)

In [None]:
sns.violinplot(x="category", y="single_base_price", data=full_train_data, palette="Set3", bw=.2, cut=1, linewidth=1)