## Step 0: Initiate Libraries




In [1]:
# Import Warnings
import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FutureWarning)
warnings.simplefilter("ignore", DeprecationWarning)

# Import Key Libraries
import numpy as np
import pandas as pd
import os

# Import Data Preprocessing Libraries
from dateutil.parser import parse

# ast : Abstract Syntax Trees
from ast import literal_eval

# Import Geospatial Libraries
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from geopy.distance import geodesic
import geopandas as gpd
import folium
from folium import plugins
from folium.plugins import *
import reverse_geocoder as rg 

# Datetime
import datetime
import datetime as dt


# Data Visualisation
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
import plotly.graph_objs as go
from plotly.tools import FigureFactory as FF

## Step 1: Data Discovery (Building Intuition)

- This is a technique we use to get an initial feel for our data tables.
- We read the data using pandas and perform method calls.
- Standardize dataset columns in the correct format.
- Explore Descriptive Statistics on Numerical Columns and more below:

### Step 1.1: Read Data

- Here will we be reading the raw data as -  `dirty_data.csv` file into our jupyter notebook.
- The variable name for the Food Delivery data would be called `dataset` .

In [2]:
# Create the file path
file_path = os.path.join(os.pardir, os.pardir, 'Melbourne-Delivery/data/dirty_data.csv')

# Load the file into a DataFrame
delivery_data = pd.read_csv(file_path)

# Copy the data
orders_df = delivery_data.copy()

In [17]:
orders_df['customer_lat'].value_counts()

customer_lat
-37.805441    2
-37.813052    2
-37.815066    2
-37.810995    2
-37.806693    2
             ..
-37.811589    1
 37.821922    1
-37.813082    1
-37.802329    1
-37.810685    1
Name: count, Length: 494, dtype: int64

##### `df.info()`

- It is an important and widely used method of Python.
- This Method prints the information or summary of the dataframe.
- It prints the various information of the Dataframe such as index type, dtype, columns, non-values, and memory usage. It gives a quick overview of the dataset.
- Info Method to get the Non-Null Count & Dtype (data type) of the dataset,
- Validate if a column and column type aligns with the format of the Business Requirements.

In [3]:
orders_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   order_id                 500 non-null    object 
 1   date                     500 non-null    object 
 2   time                     500 non-null    object 
 3   order_type               500 non-null    object 
 4   branch_code              500 non-null    object 
 5   order_items              500 non-null    object 
 6   order_price              500 non-null    float64
 7   customer_lat             500 non-null    float64
 8   customer_lon             500 non-null    float64
 9   customerHasloyalty?      500 non-null    int64  
 10  distance_to_customer_KM  500 non-null    float64
 11  delivery_fee             500 non-null    float64
dtypes: float64(5), int64(1), object(6)
memory usage: 47.0+ KB


### Step 1.2: Data Preporcessing - Cleaning 

- Here we will be cleaning the data by converting the columns to the correct data types.
- We will merge the date and time columns into one column called `order_date` and convert it to a datetime type.
- We will also rename the `customerHasloyalty?` , `distance_to_customer_KM` columns to `customer_loyalty` , and  `distance_to_customer` respectively.
- We will add an additional column called `updated at` which will be the date and time the data was updated.

In [4]:
def reverseGeocode(coordinates): 
    result = rg.search(coordinates)
    return (result)

def cleaning_data_types(orders_df):
    # Create a copy of the DataFrame to avoid modifying the original one
    df_clean = orders_df.copy()

    # Define helper function to clean date data
    def clean_date(date_str):
        date_str = date_str.strip()
        date = parse(date_str, dayfirst=True)  # dayfirst=True to handle DD/MM/YYYY properly
        return date.strftime('%Y-%m-%d')

    # Convert columns to appropriate data types
    # df_clean['order_id'] = df_clean['order_id'].str.extract('(\d+)').astype(int)
    df_clean['date'] = df_clean['date'].apply(clean_date).astype('datetime64[ns]')
    df_clean['datetime'] = pd.to_datetime(df_clean['date'].astype(str) + ' ' + df_clean['time'])
    df_clean["order_type"] = df_clean["order_type"].astype("category")

   
    # Convert 'branch_code' to upper case to handle case-insensitive duplicates
    df_clean["branch_code"] = df_clean["branch_code"].str.upper().astype("category")


    # Use exception handling for potential errors in the literal_eval() function
    try:
        df_clean["order_items"] = df_clean["order_items"].apply(literal_eval)
    except (ValueError, SyntaxError):
        pass

    # Continue with the remaining conversions
    df_clean["order_price"] = df_clean["order_price"].astype(float)
    df_clean["customer_lat"] = df_clean["customer_lat"].astype(float)
    df_clean["customer_lon"] = df_clean["customer_lon"].astype(float)
    df_clean["customerHasloyalty?"] = df_clean["customerHasloyalty?"].astype(bool)
    df_clean["distance_to_customer_KM"] = df_clean["distance_to_customer_KM"].astype(float)
    df_clean["delivery_fee"] = df_clean["delivery_fee"].astype(float)


    # make the order_price two decimal places
    df_clean['order_price'] = df_clean['order_price'].round(2)

    # make the delivery fee two decimal places
    df_clean['delivery_fee'] = df_clean['delivery_fee'].round(2)


    # transform long/lat into state
    coordinates =list(zip(df_clean['customer_lat'],df_clean['customer_lon'])) # generates pair of (lat,long)
    data = reverseGeocode(coordinates)


    # Create a new column with the City name    
    df_clean['name'] = [i['name'] for i in data]
    df_clean['admin1'] = [i['admin1'] for i in data]
    df_clean['admin2'] = [i['admin2'] for i in data]


    df_clean.drop(['admin1', 'admin2'], axis=1, inplace=True)
    df_clean.rename(columns={'name': 'location'}, inplace=True)

    # make the location column to geospacial data
    df_clean['location'] = df_clean['location'].astype('category')
 

    # Rename the customerHasloyalty? column to customerHasloyalty
    df_clean.rename(columns={'customerHasloyalty?': 'customer_loyalty'}, inplace=True)

    # Rename the distance_to_customer_KM column to distance_to_customer_km
    df_clean.rename(columns={'distance_to_customer_KM': 'distance_to_customer_km'}, inplace=True)

    # Drop the 'date' and 'time' columns
    df_clean.drop(['date', 'time'], axis=1, inplace=True)

    # Rename the 'datetime' column to 'order_date' and move it to the second position
    df_clean.rename(columns={'datetime': 'order_date'}, inplace=True)
    order_date = df_clean.pop('order_date')
    df_clean.insert(1, 'order_date', order_date)

    # Add the 'updated_at' column with the current datetime
    df_clean['updated_at'] = datetime.datetime.today().replace(second=0, microsecond=0)

    df_clean.drop(['customer_lat', 'customer_lon'], axis=1, inplace=True)

    # order data by order date
    df_clean.sort_values(by=['order_date'], inplace=True)

    # reset index
    df_clean.reset_index(drop=True, inplace=True)

    return df_clean

df_clean = cleaning_data_types(orders_df)
df_clean.info()


Loading formatted geocoded file...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   order_id                 500 non-null    object        
 1   order_date               500 non-null    datetime64[ns]
 2   order_type               500 non-null    category      
 3   branch_code              500 non-null    category      
 4   order_items              500 non-null    object        
 5   order_price              500 non-null    float64       
 6   customer_loyalty         500 non-null    bool          
 7   distance_to_customer_km  500 non-null    float64       
 8   delivery_fee             500 non-null    float64       
 9   location                 500 non-null    category      
 10  updated_at               500 non-null    datetime64[ns]
dtypes: bool(1), category(3), datetime64[ns](2), float64(3), object

In [5]:
df_clean.head(5)

Unnamed: 0,order_id,order_date,order_type,branch_code,order_items,order_price,customer_loyalty,distance_to_customer_km,delivery_fee,location,updated_at
0,ORDB04420,2018-01-03 09:51:32,Breakfast,TP,"[(Cereal, 4), (Coffee, 7)]",136.5,True,9.74,11.89,Docklands,2023-07-06 12:19:00
1,ORDI01419,2018-01-03 09:51:32,Breakfast,NS,"[(Pancake, 8), (Eggs, 4), (Coffee, 3)]",304.5,False,10.746,15.65,Docklands,2023-07-06 12:19:00
2,ORDC08940,2018-01-03 09:51:32,Breakfast,NS,"[(Coffee, 4), (Eggs, 9), (Cereal, 5), (Pancake...",381.5,False,9.338,14.24,East Melbourne,2023-07-06 12:19:00
3,ORDY08021,2018-01-03 09:51:32,Breakfast,TP,"[(Eggs, 10), (Cereal, 8), (Pancake, 1)]",412.25,True,9.77,12.35,Docklands,2023-07-06 12:19:00
4,ORDZ03555,2018-01-04 10:52:23,Breakfast,NS,"[(Pancake, 4), (Cereal, 6), (Coffee, 5)]",260.5,False,9.172,16.46,Docklands,2023-07-06 12:19:00


In [6]:
# Define directory
directory = os.path.join(os.pardir, 'data')

# Create target directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Save file
df_clean.to_csv(os.path.join(directory, 'clean_data.csv'), index=False)

### Step 1.3: Feature Engineering

- Here we will be creating new columns from existing columns.

#### Step 1.4: Feature Engineering ( `order_items` )

- Here we will be extracting the food items and their quantities from the `order_items` column into separate columns.
- `cuisine` which will be the type of cuisine the food item is.
-  `order_items_count` which will be the total number of items ordered.
-  `order_items_total` which will be the total price of the items ordered.

#### Step 1.5: Feature Engineering ( `order_date` )

- Here we will be creating new columns based on the existing columns in the dataset.
- `order_time_of_day` which will be the hour of the day the order was made. (Morning, Afternoon, Evening, Night)
- `order_day` which will be the day of the week the order was made. (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday)
-  `order_month` which will be the month of the year the order was made. (January, February, March, April, May, June, July, August, September, October, November, December)
- `order_season` which will be the season the order was made. (Summer, Autumn, Winter, Spring)


In [7]:
def feature_en(df_clean):
    # Create a copy of the DataFrame to avoid modifying the original one
    df = df_clean.copy()

   # Explode the 'order_items' column
    df_exploded = df.explode('order_items')

    # Split the tuple into two new columns
    df_exploded[['cuisine', 'quantity_ordered']] = pd.DataFrame(df_exploded['order_items'].tolist(), index=df_exploded.index)

    # make the cuisine as category
    df_exploded["cuisine"] = df_exploded["cuisine"].astype("category")

    # average_order_price = order_items_total / quantity_ordered
    df_exploded['average_item_price'] = df_exploded['order_price'] / df_exploded['quantity_ordered']

    # two decimal places
    df_exploded['average_item_price'] = df_exploded['average_item_price'].round(2)

    # Drop the 'order_items' column
    df_exploded.drop('order_items', axis=1, inplace=True)


    # Extract the year, month as Jan, Feb, Mar, etc. Add them as new columns
    df_exploded['order_month'] = df_exploded['order_date'].dt.strftime('%b')
    df_exploded['day_of_week'] = df_exploded['order_date'].dt.strftime('%a')

    # new column for the season the order was made. (Spring, Summer, Autumn, Winter)
    df_exploded['order_season'] = df_exploded['order_date'].dt.month.apply(lambda x: (x%12 + 3)//3)
    
    # change the season number to season name
    df_exploded['order_season'] = df_exploded['order_season'].map({1:'Spring', 2:'Summer', 3:'Autumn', 4:'Winter'})

    # make the month, week, season as category
    df_exploded["order_month"] = df_exploded["order_month"].astype("category")
    df_exploded["day_of_week"] = df_exploded["day_of_week"].astype("category")
    df_exploded["order_season"] = df_exploded["order_season"].astype("category")

    # reposition the columns 
    df_exploded = df_exploded[['order_id', 'order_date' , 'order_price', 'quantity_ordered' , 'average_item_price', 'order_month', 'day_of_week', 'order_season' , 'order_type', 'branch_code' , 'delivery_fee', 'location', 'cuisine' , 'customer_loyalty', 'distance_to_customer_km','updated_at']]

    return df_exploded

df_exploded = feature_en(df_clean)
df_exploded.head()

Unnamed: 0,order_id,order_date,order_price,quantity_ordered,average_item_price,order_month,day_of_week,order_season,order_type,branch_code,delivery_fee,location,cuisine,customer_loyalty,distance_to_customer_km,updated_at
0,ORDB04420,2018-01-03 09:51:32,136.5,4,34.12,Jan,Wed,Spring,Breakfast,TP,11.89,Docklands,Cereal,True,9.74,2023-07-06 12:19:00
0,ORDB04420,2018-01-03 09:51:32,136.5,7,19.5,Jan,Wed,Spring,Breakfast,TP,11.89,Docklands,Coffee,True,9.74,2023-07-06 12:19:00
1,ORDI01419,2018-01-03 09:51:32,304.5,8,38.06,Jan,Wed,Spring,Breakfast,NS,15.65,Docklands,Pancake,False,10.746,2023-07-06 12:19:00
1,ORDI01419,2018-01-03 09:51:32,304.5,4,76.12,Jan,Wed,Spring,Breakfast,NS,15.65,Docklands,Eggs,False,10.746,2023-07-06 12:19:00
1,ORDI01419,2018-01-03 09:51:32,304.5,3,101.5,Jan,Wed,Spring,Breakfast,NS,15.65,Docklands,Coffee,False,10.746,2023-07-06 12:19:00


In [8]:
df_exploded.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1596 entries, 0 to 499
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   order_id                 1596 non-null   object        
 1   order_date               1596 non-null   datetime64[ns]
 2   order_price              1596 non-null   float64       
 3   quantity_ordered         1596 non-null   int64         
 4   average_item_price       1596 non-null   float64       
 5   order_month              1596 non-null   category      
 6   day_of_week              1596 non-null   category      
 7   order_season             1596 non-null   category      
 8   order_type               1596 non-null   category      
 9   branch_code              1596 non-null   category      
 10  delivery_fee             1596 non-null   float64       
 11  location                 1596 non-null   category      
 12  cuisine                  1596 non-null  

### Step 2: Descriptive Statistics 

- Descriptive statistics include those that summarize the central tendency, dispersion and shape of a dataset's distribution, excluding NaN values. 

In [9]:
# Descriptive Statistics
df_exploded.describe(include='all')

Unnamed: 0,order_id,order_date,order_price,quantity_ordered,average_item_price,order_month,day_of_week,order_season,order_type,branch_code,delivery_fee,location,cuisine,customer_loyalty,distance_to_customer_km,updated_at
count,1596,1596,1596.0,1596.0,1596.0,1596,1596,1596,1596,1596,1596.0,1596,1596,1596,1596.0,1596
unique,500,,,,,12,7,4,3,3,,17,13,2,,
top,ORDI04767,,,,,Nov,Sat,Winter,Dinner,NS,,Melbourne,Shrimp,False,,
freq,5,,,,,169,259,447,578,560,,264,145,1416,,
mean,,2018-07-11 11:51:11.128445952,546.506234,5.461153,145.460702,,,,,,13.906479,,,,8.679623,2023-07-06 12:18:59.999999744
min,,2018-01-03 09:51:32,46.4,1.0,10.12,,,,,,4.21,,,,4.077,2023-07-06 12:19:00
25%,,2018-04-16 11:22:49,322.4,3.0,61.7575,,,,,,12.71,,,,7.666,2023-07-06 12:19:00
50%,,2018-07-05 14:05:04,496.8,5.0,100.38,,,,,,14.04,,,,8.76,2023-07-06 12:19:00
75%,,2018-10-11 18:18:35,726.8,8.0,165.585,,,,,,15.42,,,,9.84025,2023-07-06 12:19:00
max,,2018-12-31 20:00:00,1465.5,10.0,1191.5,,,,,,22.11,,,,12.892,2023-07-06 12:19:00


In [10]:
# Base on the information from the describe() method:

#  Orders Date Range 
print('Orders Date Range ---->: ', df_exploded['order_date'].min(), 'to', df_exploded['order_date'].max())

# Order Price Range
print('Order Price Range ---->: ', df_exploded['order_price'].min(), 'to', df_exploded['order_price'].max())

# Average Order Price in 2 decimal places
print('Average Order Price ---->: ', round(df_exploded['order_price'].mean(), 2))

# Popular Order Type
print('Popular Order Type ---->: ', df_exploded['order_type'].mode()[0])

# Popular Cuisine
print('Popular Cuisine ---->: ', df_exploded['cuisine'].mode()[0])

# Popular Branch
print('Popular Branch ---->: ', df_exploded['branch_code'].mode()[0])

# Popular Day of the Week
print('Popular Day of the Week ---->: ', df_exploded['day_of_week'].mode()[0])

# Popular Season
print('Popular Season ---->: ', df_exploded['order_season'].mode()[0])

# Popular Month
print('Popular Month ---->: ', df_exploded['order_month'].mode()[0])

# Pupluar Hour
print('Popular Hour ---->: ', df_exploded['order_date'].dt.hour.mode()[0])

# Delivery Fee Range
print('Delivery Fee Range ---->: ', df_exploded['delivery_fee'].min(), 'to', df_exploded['delivery_fee'].max())

# Average Disance to Customer in 2 decimal places
print('Average Disance to Customer ---->: ', round(df_exploded['distance_to_customer_km'].mean(), 2))

# Popular Location
print('Popular Location ---->: ', df_exploded['location'].mode()[0])

# Types of cuisine
print('Types of cuisine ---->: ', df_exploded['cuisine'].unique())

Orders Date Range ---->:  2018-01-03 09:51:32 to 2018-12-31 20:00:00
Order Price Range ---->:  46.4 to 1465.5
Average Order Price ---->:  546.51
Popular Order Type ---->:  Dinner
Popular Cuisine ---->:  Shrimp
Popular Branch ---->:  NS
Popular Day of the Week ---->:  Sat
Popular Season ---->:  Winter
Popular Month ---->:  Nov
Popular Hour ---->:  18
Delivery Fee Range ---->:  4.21 to 22.11
Average Disance to Customer ---->:  8.68
Popular Location ---->:  Melbourne
Types of cuisine ---->:  ['Cereal', 'Coffee', 'Pancake', 'Eggs', 'Steak', ..., 'Pasta', 'Fish&Chips', 'Salmon', 'Burger', 'Chicken']
Length: 13
Categories (13, object): ['Burger', 'Cereal', 'Chicken', 'Coffee', ..., 'Salad', 'Salmon', 'Shrimp', 'Steak']


### Step 2: Exploratory Data Analysis (EDA) - Business Analysis.

- Here we will be performing Exploratory Data Analysis (EDA) on the dataset.
- We will generate Business Questions and answer them using the data.

In [11]:
df = df_exploded.copy()

In [12]:
df['order_id'].value_counts().idxmax()

'ORDI04767'

In [15]:
# info of the order id 10488
df[df['order_id'] == 'ORDI04767']

Unnamed: 0,order_id,order_date,order_price,quantity_ordered,average_item_price,order_month,day_of_week,order_season,order_type,branch_code,delivery_fee,location,cuisine,customer_loyalty,distance_to_customer_km,updated_at
246,ORDI04767,2018-07-05 12:13:31,658.6,3,219.53,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Salad,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,8,82.32,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Burger,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,1,658.6,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Fries,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,7,94.09,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Steak,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,1,658.6,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Chicken,False,7.008,2023-07-06 12:19:00


In [16]:
df['location'].value_counts()

location
Melbourne          264
East Melbourne     229
Docklands          196
North Melbourne    131
Richmond           123
Kamaishi           119
Southbank          118
Carlton            106
West Melbourne     102
Collingwood         62
Abbotsford          36
Fitzroy             35
Flemington          29
Clifton Hill        20
Upernavik           14
Port Melbourne      10
Kensington           2
Name: count, dtype: int64

In [None]:
## lat / lon the same and orders are different 

In [18]:
df['order_id'].value_counts()

order_id
ORDI04767    5
ORDA00118    5
ORDY03274    5
ORDJ04015    5
ORDI03105    5
            ..
ORDB08622    2
ORDI01205    2
ORDJ00587    2
ORDZ07342    2
ORDJ00107    2
Name: count, Length: 500, dtype: int64

In [19]:
def get_popular_cuisine(df, day_col='day_of_week', cuisine_col='cuisine'):
    """Find the most popular cuisine for each day of the week.

    Args:
        df (pd.DataFrame): Dataframe containing the data.
        day_col (str): Column name for day of the week.
        cuisine_col (str): Column name for cuisine.

    Returns:
        pd.DataFrame: Dataframe with columns for the day of the week and the most popular cuisine.
    """
    popular_cuisine = (
        df.groupby(day_col)[cuisine_col]
        .apply(lambda x: x.value_counts().idxmax())
        .reset_index()
    )
    popular_cuisine.columns = [day_col, 'most_popular_cuisine']

    return popular_cuisine

# Usage:
popular_cuisine_per_day = get_popular_cuisine(df_exploded)


In [20]:
popular_cuisine_per_day


Unnamed: 0,day_of_week,most_popular_cuisine
0,Fri,Salad
1,Mon,Fish&Chips
2,Sat,Eggs
3,Sun,Pasta
4,Thu,Pasta
5,Tue,Fries
6,Wed,Shrimp


Q: Find the most popular order_type for each location.

In [21]:
def popular_order_by_location(df):
    return df['order_type'].value_counts().idxmax()

popular_order_per_location = df_exploded.groupby('location').apply(popular_order_by_location).reset_index()
popular_order_per_location.columns = ['Location', 'Most Popular Order Type']
popular_order_per_location


Unnamed: 0,Location,Most Popular Order Type
0,Abbotsford,Lunch
1,Carlton,Breakfast
2,Clifton Hill,Lunch
3,Collingwood,Lunch
4,Docklands,Dinner
5,East Melbourne,Dinner
6,Fitzroy,Dinner
7,Flemington,Lunch
8,Kamaishi,Lunch
9,Kensington,Breakfast


In [22]:
def popular_order_by_season(df):
    return df['order_type'].value_counts().idxmax()

popular_order_per_season = df_exploded.groupby('order_season').apply(popular_order_by_season).reset_index()
popular_order_per_season.columns = ['Season', 'Most Popular Order Type']
popular_order_per_season


Unnamed: 0,Season,Most Popular Order Type
0,Autumn,Lunch
1,Spring,Breakfast
2,Summer,Breakfast
3,Winter,Dinner


In [23]:
def popular_order_by_month(df):
    return df['order_type'].value_counts().idxmax()

popular_order_per_month = df_exploded.groupby('order_month').apply(popular_order_by_month).reset_index()
popular_order_per_month.columns = ['Month', 'Most Popular Order Type']
popular_order_per_month


Unnamed: 0,Month,Most Popular Order Type
0,Apr,Breakfast
1,Aug,Lunch
2,Dec,Dinner
3,Feb,Breakfast
4,Jan,Breakfast
5,Jul,Lunch
6,Jun,Lunch
7,Mar,Breakfast
8,May,Lunch
9,Nov,Dinner


In [24]:
df['order_id'].value_counts().idxmax()


'ORDI04767'

In [27]:
# info for order_id  10488
df[df['order_id'] == 'ORDI04767']

Unnamed: 0,order_id,order_date,order_price,quantity_ordered,average_item_price,order_month,day_of_week,order_season,order_type,branch_code,delivery_fee,location,cuisine,customer_loyalty,distance_to_customer_km,updated_at
246,ORDI04767,2018-07-05 12:13:31,658.6,3,219.53,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Salad,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,8,82.32,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Burger,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,1,658.6,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Fries,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,7,94.09,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Steak,False,7.008,2023-07-06 12:19:00
246,ORDI04767,2018-07-05 12:13:31,658.6,1,658.6,Jul,Thu,Autumn,Lunch,NS,12.2,Carlton,Chicken,False,7.008,2023-07-06 12:19:00


Q: Distribution of Order Type per branch?

###

In [28]:
# Data Visualisation
# Plot the distribution of the order_type per Branch
fig = px.histogram(df_exploded, x="branch_code", color="order_type", barmode="group", title="Distribution of order_type per Branch")
fig.show()