# Getting and Knowing your Data



### Step 1. Import the necessary libraries

In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns, sklearn

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv).

### Step 3. Assign it to a variable called chipo.

In [58]:
chipo = pd.read_csv(r'D:\python\extra\19_12_2023\chipotle.tsv.txt',delimiter = '\t')
chipo

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
...,...,...,...,...,...
4617,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Sour ...",$11.75
4618,1833,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...",$11.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",$11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$8.75


### Step 4. See the first 10 entries

In [59]:
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


### Step 5. What is the number of observations in the dataset?

In [60]:
chipo.shape[0]

4622

### Step 6. What is the number of columns in the dataset?

In [61]:
chipo.shape[1]

5

### Step 7. Print the name of all the columns.

In [62]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

### Step 8. How is the dataset indexed?

In [63]:
chipo.index

RangeIndex(start=0, stop=4622, step=1)

### Step 9. Which was the most ordered item?

In [64]:
most_ordered_item = chipo.groupby('item_name')['quantity'].sum()
most_ordered_item

item_name
6 Pack Soft Drink                         55
Barbacoa Bowl                             66
Barbacoa Burrito                          91
Barbacoa Crispy Tacos                     12
Barbacoa Salad Bowl                       10
Barbacoa Soft Tacos                       25
Bottled Water                            211
Bowl                                       4
Burrito                                    6
Canned Soda                              126
Canned Soft Drink                        351
Carnitas Bowl                             71
Carnitas Burrito                          60
Carnitas Crispy Tacos                      8
Carnitas Salad                             1
Carnitas Salad Bowl                        6
Carnitas Soft Tacos                       40
Chicken Bowl                             761
Chicken Burrito                          591
Chicken Crispy Tacos                      50
Chicken Salad                              9
Chicken Salad Bowl                       123


In [65]:
most_ordered_item.idxmax()

'Chicken Bowl'

### Step 10. How many items were ordered?

In [111]:
chipo.groupby('item_name')['order_id'].sum()

item_name
6 Pack Soft Drink                         52322
Barbacoa Bowl                             53972
Barbacoa Burrito                          74718
Barbacoa Crispy Tacos                      5613
Barbacoa Salad Bowl                        9708
Barbacoa Soft Tacos                       18725
Bottled Water                            175944
Bowl                                        472
Burrito                                    1550
Canned Soda                               76396
Canned Soft Drink                        304753
Carnitas Bowl                             62742
Carnitas Burrito                          51652
Carnitas Crispy Tacos                      5868
Carnitas Salad                             1500
Carnitas Salad Bowl                        6693
Carnitas Soft Tacos                       37673
Chicken Bowl                             713926
Chicken Burrito                          497303
Chicken Crispy Tacos                      47382
Chicken Salad                 

In [112]:
chipo.groupby('item_name')['order_id'].sum().sum()

4285772

### Step 11. What was the most ordered item in the choice_description column?

In [67]:
most_ordered_choice = chipo.groupby('choice_description')['quantity'].sum()
most_ordered_choice

choice_description
[Adobo-Marinated and Grilled Chicken, Pinto Beans, [Sour Cream, Salsa, Cheese, Cilantro-Lime Rice, Guacamole]]                                 1
[Adobo-Marinated and Grilled Chicken, [Sour Cream, Cheese, Cilantro-Lime Rice]]                                                                1
[Adobo-Marinated and Grilled Chicken]                                                                                                          1
[Adobo-Marinated and Grilled Steak, [Sour Cream, Salsa, Cheese, Cilantro-Lime Rice, Guacamole]]                                                1
[Adobo-Marinated and Grilled Steak]                                                                                                            1
                                                                                                                                              ..
[[Tomatillo-Red Chili Salsa (Hot), Tomatillo-Green Chili Salsa (Medium)], [Rice, Black Beans, Cheese, Lettuce]]

In [68]:
most_ordered_choice.idxmax()

'[Diet Coke]'

### Step 12. How many items were orderd in total?

In [69]:
items_were_ordered = chipo['quantity'].sum()
items_were_ordered 

4972

### Step 13. Turn the item price into a float

In [70]:
chipo['item_price'] = chipo['item_price'].replace('[\$,]', '', regex=True).astype(float)
print(chipo.dtypes)

order_id                int64
quantity                int64
item_name              object
choice_description     object
item_price            float64
dtype: object


### Step 14. How much was the revenue for the period in the dataset?

In [71]:
chipo_R = chipo['quantity'] * chipo['item_price']
Revenue = chipo_R.sum()
Revenue

39237.02

### Step 15. How many orders were made in the period?

In [72]:
orders = chipo['order_id'].nunique()
orders

1834

### Step 16. What is the average amount per order?

In [97]:
average_amount_per_order = chipo.groupby('order_id')['item_price'].mean()
average_amount_per_order

order_id
1        2.890000
2       16.980000
3        6.335000
4       10.500000
5        6.850000
          ...    
1830    11.500000
1831     4.300000
1832     6.600000
1833    11.750000
1834     9.583333
Name: item_price, Length: 1834, dtype: float64

In [96]:
average_amount_per_order = chipo.groupby('order_id')['item_price'].mean().mean()
average_amount_per_order

7.841910975496616

### Step 17. How many different items are sold?

In [79]:
different_items_sold = chipo['item_name'].nunique()
different_items_sold

50