# Chipotle快餐数据 - 了解数据

![](images/1.jpeg)

### 1. 从文件中加载数据，并查看数据前3行

In [1]:
# 导入pandas库并命名为pd
import pandas as pd

In [2]:
# 加载数据
chipo = pd.read_csv('data/chipotle.tsv', sep='\t')

In [3]:
chipo.head(3)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39


In [4]:
# 查看各列的数据类型
chipo.dtypes

order_id               int64
quantity               int64
item_name             object
choice_description    object
item_price            object
dtype: object

In [5]:
# 将item_price列转换为数字类型
chipo.item_price = chipo.item_price.apply(lambda x: float(x[1:]))

In [6]:
chipo.dtypes

order_id                int64
quantity                int64
item_name              object
choice_description     object
item_price            float64
dtype: object

In [7]:
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98
6,3,1,Side of Chips,,1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",9.25


### 2. 查看数据集的基本信息

In [8]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
order_id              4622 non-null int64
quantity              4622 non-null int64
item_name             4622 non-null object
choice_description    3376 non-null object
item_price            4622 non-null float64
dtypes: float64(1), int64(2), object(2)
memory usage: 180.7+ KB


### 3. 查看数据的行列数和列名

In [9]:
print ('(row, column) = ', chipo.shape)

(row, column) =  (4622, 5)


In [10]:
# 列名
print('columns = ', chipo.columns)

columns =  Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')


In [11]:
# 查看索引信息
chipo.index

RangeIndex(start=0, stop=4622, step=1)

### 4. 查询商品种类的数量，以及下单最多的几种商品

In [12]:
print ('Total items: ', len(chipo.item_name.unique()))
print ('--------------------------------------------------------------------------------')
print ('Total items: ', chipo.item_name.value_counts().count())
print ('--------------------------------------------------------------------------------')
print ('Total items = ', len(chipo.groupby(['item_name']).groups))

Total items:  50
--------------------------------------------------------------------------------
Total items:  50
--------------------------------------------------------------------------------
Total items =  50


In [13]:
# 获取销量最多的商品名
chipo.item_name.value_counts().head().index

Index(['Chicken Bowl', 'Chicken Burrito', 'Chips and Guacamole',
       'Steak Burrito', 'Canned Soft Drink'],
      dtype='object')

In [14]:
# 获取销量最多的商品销售量
chipo.item_name.value_counts().head()

Chicken Bowl           726
Chicken Burrito        553
Chips and Guacamole    479
Steak Burrito          368
Canned Soft Drink      301
Name: item_name, dtype: int64

### 5. 查询最受欢迎的食物及其销量，销售额

In [18]:
# 查询最受欢迎的食物
favoriteItems = chipo.groupby(['item_name']).sum().sort_values('quantity', ascending=False)
# 删除order_id列
#favoriteItems=favoriteItems.drop('order_id', axis=1)
#favoriteItems=favoriteItems.drop('order_id', axis=1)
favoriteItems.head()

Unnamed: 0_level_0,order_id,quantity,item_price
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chicken Bowl,713926,761,7342.73
Chicken Burrito,497303,591,5575.82
Chips and Guacamole,449959,506,2201.04
Steak Burrito,328437,386,3851.43
Canned Soft Drink,304753,351,438.75


### 6. 查询最受欢迎食物（下单数，销量，销售额）

In [19]:
favoriteItems = chipo.groupby(['item_name']).agg({'order_id' : 'count', 'quantity' : 'sum', 'item_price' : 'sum'}).sort_values('item_price', ascending=False)
favoriteItems.head()

Unnamed: 0_level_0,order_id,quantity,item_price
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chicken Bowl,726,761,7342.73
Chicken Burrito,553,591,5575.82
Steak Burrito,368,386,3851.43
Steak Bowl,211,221,2260.19
Chips and Guacamole,479,506,2201.04


###  7. 查询订单的商品总数量

In [75]:
chipo.quantity.sum()

4972

### 8. 统计总收入

In [20]:
chipo.item_price.sum()

34500.16

### 9. 统计订单总数

In [22]:
chipo.order_id.value_counts().count()

1834

### 10. 订单平均消费

In [82]:
chipo.item_price.sum() / chipo.order_id.value_counts().count()
# chipo.groupby(by=['order_id']).sum()['item_price'].mean()

18.811428571428575

### 11. 按商品名称过滤查询数据

In [91]:
chipo[(chipo['item_name'] == 'Chicken Bowl') & (chipo['quantity'] > 1)].head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
154,70,2,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",17.5
282,124,2,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",17.5
409,178,3,Chicken Bowl,"[[Fresh Tomato Salsa (Mild), Tomatillo-Green C...",32.94
415,181,2,Chicken Bowl,[Tomatillo Red Chili Salsa],17.5


### 12. 查询消费金额最大的订单信息 (3个)

In [23]:
maxOrder = chipo.groupby(['order_id']).sum().sort_values('item_price', ascending=False)
maxOrder.head(10)

Unnamed: 0_level_0,quantity,item_price
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1
926,23,205.25
1443,35,160.74
1483,14,139.0
691,11,118.25
1786,20,114.3
205,12,109.9
511,17,104.59
491,10,102.0
1449,11,95.39
759,18,86.3


In [25]:
arrOrderId = maxOrder.head(3).index[0:3].values
arrOrderId

array([ 926, 1443, 1483], dtype=int64)

In [108]:
chipo[chipo['order_id'].isin(arrOrderId)]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
2304,926,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Sour Cream, Lettuce]]",9.25
2305,926,1,Chicken Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",8.75
2306,926,1,Chicken Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",8.75
2307,926,1,Chicken Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",8.75
2308,926,1,Steak Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Lettu...",9.25
2309,926,1,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",8.75
2310,926,1,Steak Burrito,"[Roasted Chili Corn Salsa, [Rice, Cheese, Sour...",9.25
2311,926,1,Chicken Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",8.75
2312,926,1,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Lettuce]]",8.75
2313,926,1,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Cheese, Sour Cream...",8.75
