### Scenario
### You are a financial data analyst at Chipotle and your manager has tasked you with analyzing the most recent sales numbers. She has provided the following set of questions she would like answered

In [1]:
## Import the library
import pandas as pd
import numpy  as np
import matplotlib.pyplot as   plt
import seaborn as sns
import plotly.express as px
from ydata_profiling import ProfileReport
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
sns.set()

pd.options.display.float_format = '{:,.2f}'.format
pd.options.display.max_rows = None
pd.options.display.max_columns = None

In [2]:
url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'
chipo = pd.read_csv(url, sep = '\t')

In [11]:
chipo.sample(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
3081,1228,1,Steak Bowl,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",$11.75
4470,1781,1,Steak Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$11.75
175,78,1,Steak Soft Tacos,"[Roasted Chili Corn Salsa (Medium), Lettuce]",$8.99
3087,1230,1,Chicken Bowl,"[[Roasted Chili Corn Salsa (Medium), Fresh Tom...",$8.49
4442,1770,1,Bottled Water,,$1.50
3789,1517,1,6 Pack Soft Drink,[Diet Coke],$6.49
1535,624,1,Chips and Guacamole,,$3.99
107,47,1,Canned Soda,[Dr. Pepper],$1.09
1866,756,1,Barbacoa Crispy Tacos,"[Fresh Tomato Salsa, [Rice, Cheese, Sour Cream]]",$9.25
3032,1206,1,Chips and Guacamole,,$4.45


In [5]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


In [12]:
chipo.duplicated().sum()

59

In [17]:
## remove duplicated 
chipo.drop_duplicates(inplace=True)

In [18]:
chipo.duplicated().sum()

0

In [19]:
chipo.isnull().sum()

order_id                 0
quantity                 0
item_name                0
choice_description    1228
item_price               0
dtype: int64

In [20]:
chipo.sample(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
2270,913,1,Carnitas Burrito,"[Fresh Tomato Salsa, [Rice, Pinto Beans, Cheese]]",$9.25
2697,1071,1,Carnitas Burrito,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",$9.25
1353,552,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Fajita Vegetabl...",$11.75
2249,907,1,Chicken Soft Tacos,"[Roasted Chili Corn Salsa, [Rice, Cheese]]",$8.75
4265,1701,1,Veggie Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",$11.25
3464,1392,1,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Cheese, Guacamole,...",$11.25
3784,1514,1,Canned Soft Drink,[Diet Coke],$1.25
1870,758,1,Chips,,$2.15
4043,1618,1,Canned Soft Drink,[Diet Coke],$1.25
4307,1718,2,Steak Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Sour ...",$23.50


In [21]:
# remove $ from item_price
chipo.item_price=chipo.item_price.str.strip("$")

In [23]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4563 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4563 non-null   int64 
 1   quantity            4563 non-null   int64 
 2   item_name           4563 non-null   object
 3   choice_description  3335 non-null   object
 4   item_price          4563 non-null   object
dtypes: int64(2), object(3)
memory usage: 213.9+ KB


In [24]:
## Turn the item price into a float
# change type of item_price

chipo['item_price']=chipo['item_price'].astype(float)

In [25]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4563 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   order_id            4563 non-null   int64  
 1   quantity            4563 non-null   int64  
 2   item_name           4563 non-null   object 
 3   choice_description  3335 non-null   object 
 4   item_price          4563 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 213.9+ KB


In [36]:
## Univariate Analyses 

In [44]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [None]:
## Which was the most-ordered item?

In [42]:
chipo.item_name.value_counts()

Chicken Bowl                             717
Chicken Burrito                          546
Chips and Guacamole                      474
Steak Burrito                            365
Canned Soft Drink                        290
Steak Bowl                               210
Chips                                    208
Bottled Water                            155
Chicken Soft Tacos                       111
Chips and Fresh Tomato Salsa             110
Chicken Salad Bowl                       110
Canned Soda                              102
Side of Chips                            101
Veggie Burrito                            95
Barbacoa Burrito                          90
Veggie Bowl                               85
Carnitas Bowl                             68
Barbacoa Bowl                             65
Carnitas Burrito                          59
Steak Soft Tacos                          55
6 Pack Soft Drink                         54
Chicken Crispy Tacos                      47
Chips and 

In [46]:
fig=px.bar(data_frame=chipo.item_name.value_counts(),text_auto=True)
fig.update_layout(title="Distribution of item_name" )
fig.update_layout(xaxis_title= "item_name")
fig.update_traces(textposition='outside')
fig.update_layout(title_x=.5)
fig.show()

In [None]:
## Which was the most-ordered item?  Chicken Bowl 

In [None]:
## For the most-ordered item, how many items were ordered? 717

In [None]:
## What was the most ordered item in the choice_description column?

In [141]:
chipo[chipo['item_name']=="Chicken Bowl"].groupby('choice_description')['item_name'].value_counts()

choice_description                                                                                                                                                                                item_name   
[Fresh Tomato (Mild), [Guacamole, Rice]]                                                                                                                                                          Chicken Bowl     1
[Fresh Tomato (Mild), [Lettuce, Fajita Veggies, Black Beans, Rice, Sour Cream, Cheese]]                                                                                                           Chicken Bowl     1
[Fresh Tomato (Mild), [Lettuce, Fajita Veggies, Pinto Beans, Rice, Sour Cream, Cheese]]                                                                                                           Chicken Bowl     1
[Fresh Tomato (Mild), [Rice, Sour Cream, Cheese]]                                                                                                         

In [None]:
## How many items were ordered in total? 4563

In [142]:
chipo.order_id.count()

4563

In [None]:
## How much was the revenue for the period in the dataset? 38914.11$

In [145]:
chipo.sample(5)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
2123,855,1,Veggie Bowl,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",11.25
868,359,1,Chicken Bowl,"[Tomatillo Green Chili Salsa, [Rice, Pinto Bea...",8.75
967,397,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",9.25
744,307,1,Chips and Guacamole,,3.99
2295,922,1,Chicken Bowl,"[Roasted Chili Corn Salsa, [Cheese, Lettuce, F...",8.75


In [147]:
chipo['revenue']=chipo['quantity']*chipo["item_price"]

In [149]:
chipo.sample(5)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,revenue
3970,1591,2,Steak Burrito,"[Fresh Tomato Salsa, [Lettuce, Sour Cream, Che...",18.5,37.0
1522,620,1,Chicken Burrito,"[Tomatillo Red Chili Salsa, [Rice, Black Beans...",8.75,8.75
3168,1266,1,Chicken Burrito,"[Tomatillo Red Chili Salsa, [Rice, Black Beans...",8.75,8.75
2493,990,1,Canned Soda,[Coca Cola],1.09,1.09
2286,918,1,Chicken Burrito,"[Fresh Tomato Salsa, Rice]",8.75,8.75


In [150]:
chipo.revenue.sum()

38914.11

In [None]:
## How many orders were made in the period? 4913

In [151]:
chipo.quantity.sum()

4913

In [152]:
## What is the average revenue amount per order?

In [153]:
chipo.groupby("item_name")['revenue'].mean()

item_name
6 Pack Soft Drink                        6.85
Barbacoa Bowl                           10.20
Barbacoa Burrito                         9.84
Barbacoa Crispy Tacos                   12.61
Barbacoa Salad Bowl                     10.78
Barbacoa Soft Tacos                     10.02
Bottled Water                            4.12
Bowl                                    37.00
Burrito                                  7.40
Canned Soda                              1.86
Canned Soft Drink                        2.03
Carnitas Bowl                           12.22
Carnitas Burrito                        10.45
Carnitas Crispy Tacos                   13.71
Carnitas Salad                           8.99
Carnitas Salad Bowl                     11.06
Carnitas Soft Tacos                      9.40
Chicken Bowl                            11.10
Chicken Burrito                         11.58
Chicken Crispy Tacos                    11.15
Chicken Salad                            9.01
Chicken Salad Bowl      

In [155]:
## How many different items are sold?

In [164]:
chipo.groupby("item_name")['quantity'].sum().reset_index()

Unnamed: 0,item_name,quantity
0,6 Pack Soft Drink,55
1,Barbacoa Bowl,65
2,Barbacoa Burrito,90
3,Barbacoa Crispy Tacos,12
4,Barbacoa Salad Bowl,9
5,Barbacoa Soft Tacos,25
6,Bottled Water,204
7,Bowl,4
8,Burrito,6
9,Canned Soda,124
