In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv(r"chipotle.csv", encoding="utf8")
df.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


In [4]:
# drop the order_id column

df.drop("order_id", axis=1, inplace=True)
df.head(5)

Unnamed: 0,quantity,item_name,choice_description,item_price
0,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,Izze,[Clementine],$3.39
2,1,Nantucket Nectar,[Apple],$3.39
3,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [5]:
df.rename(columns={"item_price":"order_price"}, inplace=True)
df.head(5)

Unnamed: 0,quantity,item_name,choice_description,order_price
0,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,Izze,[Clementine],$3.39
2,1,Nantucket Nectar,[Apple],$3.39
3,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [6]:
# Get the most expenseive order, what was ordered

In [7]:
df["order_price"] = df["order_price"].apply(lambda x: float(x.replace("$", "")))
df[df["order_price"] == df["order_price"].max()]

Unnamed: 0,quantity,item_name,choice_description,order_price
3598,15,Chips and Fresh Tomato Salsa,,44.25


In [8]:
# How many times people ordered Chicken Bowl?

In [9]:
df[df["item_name"] == "Chicken Bowl"]["quantity"].sum()

761

In [10]:
# Total revenue of Chicken Bowl

In [11]:
round(df[df["item_name"] == "Chicken Bowl"]["order_price"].sum(), 2)

7342.73

In [12]:
df["order_price"].head()

0     2.39
1     3.39
2     3.39
3     2.39
4    16.98
Name: order_price, dtype: float64

In [13]:
df[["order_price"]].head()

Unnamed: 0,order_price
0,2.39
1,3.39
2,3.39
3,2.39
4,16.98


In [14]:
items_by_quantity = df.groupby("item_name")[["quantity"]].sum()
items_by_quantity.reset_index(inplace=True)
items_by_quantity.head()

Unnamed: 0,item_name,quantity
0,6 Pack Soft Drink,55
1,Barbacoa Bowl,66
2,Barbacoa Burrito,91
3,Barbacoa Crispy Tacos,12
4,Barbacoa Salad Bowl,10


In [15]:
items_by_quantity.sort_values("quantity", ascending=False, inplace=True)
items_by_quantity.head(5)

Unnamed: 0,item_name,quantity
17,Chicken Bowl,761
18,Chicken Burrito,591
25,Chips and Guacamole,506
39,Steak Burrito,386
10,Canned Soft Drink,351


In [16]:
# get top 5 items by revenue

In [17]:
items_by_revenue = df.groupby("item_name")[["order_price"]].sum()
items_by_revenue.reset_index(inplace=True)
items_by_revenue.sort_values("order_price", ascending=False, inplace=True)
items_by_revenue.head(5)

Unnamed: 0,item_name,order_price
17,Chicken Bowl,7342.73
18,Chicken Burrito,5575.82
39,Steak Burrito,3851.43
38,Steak Bowl,2260.19
25,Chips and Guacamole,2201.04


In [18]:
# answer
# avoid using python function round()

total_revenue = df["order_price"].sum()
total_revenue

items_by_revenue["revenue%"] = round(items_by_revenue["order_price"] / total_revenue * 100, 2)
items_by_revenue.head()

Unnamed: 0,item_name,order_price,revenue%
17,Chicken Bowl,7342.73,21.28
18,Chicken Burrito,5575.82,16.16
39,Steak Burrito,3851.43,11.16
38,Steak Bowl,2260.19,6.55
25,Chips and Guacamole,2201.04,6.38


In [19]:
# answer
# use pd built-in functions round() intead

items_by_revenue["revenue%"] = items_by_revenue["order_price"] / total_revenue * 100
items_by_revenue["revenue%"] = items_by_revenue["revenue%"].round(2)
items_by_revenue.head()

Unnamed: 0,item_name,order_price,revenue%
17,Chicken Bowl,7342.73,21.28
18,Chicken Burrito,5575.82,16.16
39,Steak Burrito,3851.43,11.16
38,Steak Bowl,2260.19,6.55
25,Chips and Guacamole,2201.04,6.38


In [20]:
# filter soda with quantity more than 1

In [21]:
# when filtering a dataset into a subset, pandas doesn't create a copy of df
# instead, it create a view or zoom of the original data set
# therefore, changing subset also change the original data set

cs = df[df["item_name"] == "Canned Soda"]
cs[cs["quantity"]>1].count().iloc[0]

20

In [22]:
# either codition may fail and return 0

condition_1 = df["item_name"] == "Canned Soda"
condition_2 = df["quantity"] > 1
df[condition_1 & condition_2].count().iloc[0]

20