In [1]:
# import the required packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

%matplotlib inline

In [2]:
# load the data
orders = pd.read_csv("orders.csv")
order_details = pd.read_csv("order_details.csv")
pizza_types = pd.read_csv("pizza_types.csv", encoding= 'unicode_escape')
pizzas = pd.read_csv("pizzas.csv", encoding= 'unicode_escape')

In [3]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21350 entries, 0 to 21349
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   order_id  21350 non-null  int64 
 1   date      21350 non-null  object
 2   time      21350 non-null  object
dtypes: int64(1), object(2)
memory usage: 500.5+ KB


In [4]:
orders.date = pd.to_datetime(orders.date)

In [5]:
# add a column for day of the week
orders["day"] = orders["date"].dt.day_name()

In [22]:
orders.time = pd.to_datetime(orders.time)

In [24]:
# add a column to get the time in hours
orders["hour"] = orders["time"].dt.hour

In [25]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21350 entries, 0 to 21349
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   order_id  21350 non-null  int64         
 1   date      21350 non-null  datetime64[ns]
 2   time      21350 non-null  datetime64[ns]
 3   day       21350 non-null  object        
 4   hour      21350 non-null  int64         
dtypes: datetime64[ns](2), int64(2), object(1)
memory usage: 834.1+ KB


In [11]:
orders.head()

Unnamed: 0,order_id,date,time,day
0,1,2015-01-01,11:38:36,Thursday
1,2,2015-01-01,11:57:40,Thursday
2,3,2015-01-01,12:12:28,Thursday
3,4,2015-01-01,12:16:31,Thursday
4,5,2015-01-01,12:21:30,Thursday


In [7]:
order_details.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48620 entries, 0 to 48619
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   order_details_id  48620 non-null  int64 
 1   order_id          48620 non-null  int64 
 2   pizza_id          48620 non-null  object
 3   quantity          48620 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 1.5+ MB


In [8]:
pizza_types.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   pizza_type_id  32 non-null     object
 1   name           32 non-null     object
 2   category       32 non-null     object
 3   ingredients    32 non-null     object
dtypes: object(4)
memory usage: 1.1+ KB


In [9]:
pizzas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   pizza_id       96 non-null     object 
 1   pizza_type_id  96 non-null     object 
 2   size           96 non-null     object 
 3   price          96 non-null     float64
dtypes: float64(1), object(3)
memory usage: 3.1+ KB


In [6]:
orders.head()

Unnamed: 0,order_id,date,time
0,1,2015-01-01,11:38:36
1,2,2015-01-01,11:57:40
2,3,2015-01-01,12:12:28
3,4,2015-01-01,12:16:31
4,5,2015-01-01,12:21:30


In [4]:
orders.shape

(21350, 3)

In [5]:
# get number of unique values
orders.nunique()

order_id    21350
date          358
time        16382
dtype: int64

In [8]:
# group by day of the week
orders.groupby(['day'])['day'].agg('count')

day
Friday       3538
Monday       2794
Saturday     3158
Sunday       2624
Thursday     3239
Tuesday      2973
Wednesday    3024
Name: day, dtype: int64

In [10]:
# group the data into dates
orders.groupby(['date'])['date'].agg('count')

date
2015-01-01    69
2015-01-02    67
2015-01-03    66
2015-01-04    52
2015-01-05    54
              ..
2015-12-27    35
2015-12-28    39
2015-12-29    27
2015-12-30    32
2015-12-31    73
Name: date, Length: 358, dtype: int64

In [11]:
orders.groupby(['time'])['time'].agg('count')

time
09:52:21    1
10:25:19    1
10:34:34    1
10:43:04    1
10:50:46    1
           ..
23:05:08    1
23:05:16    1
23:05:17    1
23:05:24    1
23:05:52    1
Name: time, Length: 16382, dtype: int64

In [12]:
order_details.head()

Unnamed: 0,order_details_id,order_id,pizza_id,quantity
0,1,1,hawaiian_m,1
1,2,2,classic_dlx_m,1
2,3,2,five_cheese_l,1
3,4,2,ital_supr_l,1
4,5,2,mexicana_m,1


In [13]:
order_details.nunique()

order_details_id    48620
order_id            21350
pizza_id               91
quantity                4
dtype: int64

In [14]:
order_details.groupby(['quantity'])['quantity'].agg('count')

quantity
1    47693
2      903
3       21
4        3
Name: quantity, dtype: int64

In [15]:
pizza_types.head()

Unnamed: 0,pizza_type_id,name,category,ingredients
0,bbq_ckn,The Barbecue Chicken Pizza,Chicken,"Barbecued Chicken, Red Peppers, Green Peppers,..."
1,cali_ckn,The California Chicken Pizza,Chicken,"Chicken, Artichoke, Spinach, Garlic, Jalapeno ..."
2,ckn_alfredo,The Chicken Alfredo Pizza,Chicken,"Chicken, Red Onions, Red Peppers, Mushrooms, A..."
3,ckn_pesto,The Chicken Pesto Pizza,Chicken,"Chicken, Tomatoes, Red Peppers, Spinach, Garli..."
4,southw_ckn,The Southwest Chicken Pizza,Chicken,"Chicken, Tomatoes, Red Peppers, Red Onions, Ja..."


In [16]:
pizzas.head()

Unnamed: 0,pizza_id,pizza_type_id,size,price
0,bbq_ckn_s,bbq_ckn,S,12.75
1,bbq_ckn_m,bbq_ckn,M,16.75
2,bbq_ckn_l,bbq_ckn,L,20.75
3,cali_ckn_s,cali_ckn,S,12.75
4,cali_ckn_m,cali_ckn,M,16.75


In [17]:
orders.isnull().sum()

order_id    0
date        0
time        0
day         0
dtype: int64

In [18]:
# number of unique values in the date column
orders.date.nunique()

358

In [19]:
orders.time.nunique()

16382

In [20]:
pizza_types.shape

(32, 4)

In [21]:
pizzas.pizza_id.nunique()

96