# Visualizing Chipotle's Data

Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.

### Step 1. Import the necessary libraries

In [None]:
import pandas as pd

### Step 2. Import the dataset (chipotle.tsv). 

In [None]:
# Import the dataset
chipo = pd.read_csv('chipotle.tsv', sep='\t')

### Step 3. Assign it to a variable called chipo.

In [None]:
# Assign to variable chipo (already done in Step 2)
chipo = pd.read_csv('chipotle.tsv', sep='\t')
chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


### Step 4. Show the average of the column item_price

In [None]:
# Show the average of item_price
# First, we need to check if item_price is already numeric
print(chipo['item_price'].dtype)
print(chipo['item_price'].head())

# Calculate average (item_price appears to be already numeric in this dataset)
chipo['item_price'].mean()

float64
0     2.39
1     3.39
2     3.39
3     2.39
4    16.98
Name: item_price, dtype: float64


np.float64(7.464335785374297)

### Step 5. Count the number of occurrences for "Chicken Bowl"

In [None]:
# Count the number of occurrences for "Chicken Bowl"
(chipo['item_name'] == 'Chicken Bowl').sum()

### Step 6. Count the number of items with ``NULL``as the choice_description

In [None]:
# Count the number of items with NULL as choice_description
(chipo['choice_description'] == 'NULL').sum()

### Step 7. Show the first 10 entries

In [None]:
# Show the first 10 entries
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98
6,3,1,Side of Chips,,1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",9.25


### Step 8. Create a bar plot of the top 5 items bought (total quantity per item)
Think simple... It is possible to do with what you know. No extras needed.

In [None]:
import matplotlib.pyplot as plt

# Create a bar plot of the top 5 items bought (total quantity per item)
top5 = chipo.groupby('item_name')['quantity'].sum().nlargest(5)
top5.plot(kind='bar')
plt.title('Top 5 Items by Quantity')
plt.xlabel('Item Name')
plt.ylabel('Total Quantity')
plt.tight_layout()
plt.show()

### Step 9. Create a scatterplot with the number of items orderered per order price
#### Hint: Price should be in the X-axis and Items ordered in the Y-axis
(You can groupby and use the result)

In [None]:
# Create a scatterplot with the number of items ordered per order price
# Group by order_id to get total price and quantity per order
order_data = chipo.groupby('order_id').agg({
    'item_price': 'sum',
    'quantity': 'sum'
}).reset_index()

# Create scatter plot (Price on X-axis, Items ordered on Y-axis)
plt.scatter(order_data['item_price'], order_data['quantity'])
plt.xlabel('Order Price')
plt.ylabel('Items Ordered')
plt.title('Items Ordered vs Order Price')
plt.show()