### Step 1. 필요한 라이브러리 임포트

In [1]:
import pandas as pd
import re

### Step 2. [데이터](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv) 확인

- order_id 주문번호
- quantity 주문 개수
- item_name 주문한 상품명
- choice_description 선택사항
- item_price 가격

### Step 3. 데이터 불러오기


**read_csv를 통해 데이터를 올바른 형태로 불러온 뒤 chipo에 저장해주세요**  
data: chipotle.tsv

In [2]:
chipo = pd.read_csv('../data/chipotle.tsv', sep='\t')
chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


### Step 4. 누락값 확인하기 
chipo에서 누락값이 존재하는지 확인해주세요

In [4]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


In [3]:
chipo.loc[1, 'choice_description']

'[Clementine]'

In [4]:
# 누락값을 리스트로 넣는 방법
chipo['choice_description'].fillna('[]')

0                                                      []
1                                            [Clementine]
2                                                 [Apple]
3                                                      []
4       [Tomatillo-Red Chili Salsa (Hot), [Black Beans...
                              ...                        
4617    [Fresh Tomato Salsa, [Rice, Black Beans, Sour ...
4618    [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese...
4619    [Fresh Tomato Salsa, [Fajita Vegetables, Pinto...
4620    [Fresh Tomato Salsa, [Fajita Vegetables, Lettu...
4621    [Fresh Tomato Salsa, [Fajita Vegetables, Pinto...
Name: choice_description, Length: 4622, dtype: object

### Step 5. item_price에서 $를 없애고 float 형식으로 새롭게 저장해주세요.

In [5]:
chipo['item_price'].head(1)

0    $2.39 
Name: item_price, dtype: object

In [11]:
def delete_dollar(x):
    return float(x.replace('$', ''))

In [None]:
chipo['item_price'] = chipo['item_price'].apply(delete_dollar)

In [13]:
chipo.head(5)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


### Step 6. 아이템별 주문 수(order_id)를 출력해주세요

In [14]:
#그룹지어 계산할 열을 적지 않았을때 .size() 사용 -df 크기 가져오는 메서드//.count 사용가능
chipo.groupby('item_name').size()

item_name
6 Pack Soft Drink                         54
Barbacoa Bowl                             66
Barbacoa Burrito                          91
Barbacoa Crispy Tacos                     11
Barbacoa Salad Bowl                       10
Barbacoa Soft Tacos                       25
Bottled Water                            162
Bowl                                       2
Burrito                                    6
Canned Soda                              104
Canned Soft Drink                        301
Carnitas Bowl                             68
Carnitas Burrito                          59
Carnitas Crispy Tacos                      7
Carnitas Salad                             1
Carnitas Salad Bowl                        6
Carnitas Soft Tacos                       40
Chicken Bowl                             726
Chicken Burrito                          553
Chicken Crispy Tacos                      47
Chicken Salad                              9
Chicken Salad Bowl                       110


### Step 7. 아이템별 주문 총량(quantity)을 출력해주세요.

In [14]:
chipo.groupby('item_name')['quantity'].sum()

item_name
6 Pack Soft Drink                         55
Barbacoa Bowl                             66
Barbacoa Burrito                          91
Barbacoa Crispy Tacos                     12
Barbacoa Salad Bowl                       10
Barbacoa Soft Tacos                       25
Bottled Water                            211
Bowl                                       4
Burrito                                    6
Canned Soda                              126
Canned Soft Drink                        351
Carnitas Bowl                             71
Carnitas Burrito                          60
Carnitas Crispy Tacos                      8
Carnitas Salad                             1
Carnitas Salad Bowl                        6
Carnitas Soft Tacos                       40
Chicken Bowl                             761
Chicken Burrito                          591
Chicken Crispy Tacos                      50
Chicken Salad                              9
Chicken Salad Bowl                       123


### Step 8. 각 아이템의 가격을 구해서 unit_price 컬럼에 추가해주세요.
주문당 quantity가 1일때의 가격 (item_price / quantity)

In [15]:
chipo['unit_price'] = chipo['item_price']  /  chipo['quantity']

In [16]:
chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,unit_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39
1,1,1,Izze,[Clementine],3.39,3.39
2,1,1,Nantucket Nectar,[Apple],3.39,3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,8.49


### Step 9. 아이템별 개당 가격의 평균을 구하고 가장 비싼순으로 5개를 출력해주세요

In [17]:
chipo.groupby('item_name')['unit_price'].mean().sort_values(ascending = False).head()

item_name
Carnitas Salad Bowl    11.056667
Steak Salad Bowl       11.027931
Barbacoa Salad Bowl    10.640000
Carnitas Bowl          10.315588
Steak Bowl             10.232559
Name: unit_price, dtype: float64

### Step 10. 한 주문(같은 order_id)에 20달러 이상 지불한 손님을 출력해주세요.

In [18]:
chipo20 = chipo.groupby('order_id')['item_price'].sum()

In [18]:
chipo20.head(1)

order_id
1    11.56
Name: item_price, dtype: float64

In [19]:
chipo_index = chipo20[chipo20 >= 20].index
chipo_index

Index([   4,   18,   20,   21,   26,   28,   30,   34,   40,   43,
       ...
       1811, 1812, 1822, 1824, 1825, 1827, 1829, 1830, 1833, 1834],
      dtype='int64', name='order_id', length=589)

### Step 11. 위 10번에서 주문량이 2개인 아이템중 가장 많이 팔린 아이템은 무엇인가요?
Chips and Guacamole

In [20]:
# 위 결과의 인덱스를 order_id 컬럼에서 찾겠다.
chipo20df = chipo[chipo['order_id'].isin(chipo_index)]
chipo20df2 = chipo20df[chipo20df['quantity'] == 2]
chipo20df2.groupby('item_name').size().sort_values(ascending = False)  
##그룹지어 계산할 열을 적지 않았을때 .size() 사용-df 크기 가져오는 메서드

item_name
Chips and Guacamole                      20
Chicken Bowl                             17
Canned Soft Drink                        15
Chicken Burrito                          13
Chips                                    12
Steak Burrito                            11
Chicken Salad Bowl                       10
Canned Soda                               7
Bottled Water                             6
Steak Bowl                                6
Chicken Crispy Tacos                      3
Chicken Soft Tacos                        3
Chips and Fresh Tomato Salsa              2
Chips and Tomatillo Green Chili Salsa     2
Chips and Tomatillo Red Chili Salsa       2
Steak Salad Bowl                          2
Nantucket Nectar                          2
Steak Soft Tacos                          1
Veggie Bowl                               1
Steak Crispy Tacos                        1
6 Pack Soft Drink                         1
Chips and Tomatillo-Red Chili Salsa       1
Chips and Tomatillo-Gr

### Step 12. Veggie Salad Bowl은 몇 건 주문되었나요?

In [63]:
chipo[chipo['item_name'] == 'Veggie Salad Bowl'].count()

order_id              18
quantity              18
item_name             18
choice_description    18
item_price            18
unit_price            18
dtype: int64

### Step 13. 아이템명에 Chicken이 들어간 음식은 몇 건 주문되었나요?
**Hint: 정규표현식 사용**   
1560

In [21]:
# 패턴, 검색할 장소?
print(re.match('Chicken', '   Chicken Bowl'))

print(re.search('Chicken', '    Chicken Bowl'))

None
<re.Match object; span=(4, 11), match='Chicken'>


In [22]:
def find(x):
    return re.search('Chicken', x)

In [26]:
chipo[chipo['item_name'].apply(find).astype(bool)]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,unit_price
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,8.49
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98,10.98
11,6,1,Chicken Crispy Tacos,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",8.75,8.75
12,6,1,Chicken Soft Tacos,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",8.75,8.75
13,7,1,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25,11.25
...,...,...,...,...,...,...
4604,1828,1,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",8.75,8.75
4615,1832,1,Chicken Soft Tacos,"[Fresh Tomato Salsa, [Rice, Cheese, Sour Cream]]",8.75,8.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",11.25,11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",8.75,8.75


In [24]:
chipo[chipo['item_name'].str.contains('Chicken')]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,unit_price
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,8.49
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98,10.98
11,6,1,Chicken Crispy Tacos,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",8.75,8.75
12,6,1,Chicken Soft Tacos,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",8.75,8.75
13,7,1,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25,11.25
...,...,...,...,...,...,...
4604,1828,1,Chicken Bowl,"[Fresh Tomato Salsa, [Rice, Black Beans, Chees...",8.75,8.75
4615,1832,1,Chicken Soft Tacos,"[Fresh Tomato Salsa, [Rice, Cheese, Sour Cream]]",8.75,8.75
4619,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Pinto...",11.25,11.25
4620,1834,1,Chicken Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",8.75,8.75


In [27]:
chipo['item_name'].str.split(' ', expand = True)

Unnamed: 0,0,1,2,3,4,5
0,Chips,and,Fresh,Tomato,Salsa,
1,Izze,,,,,
2,Nantucket,Nectar,,,,
3,Chips,and,Tomatillo-Green,Chili,Salsa,
4,Chicken,Bowl,,,,
...,...,...,...,...,...,...
4617,Steak,Burrito,,,,
4618,Steak,Burrito,,,,
4619,Chicken,Salad,Bowl,,,
4620,Chicken,Salad,Bowl,,,


In [None]:
like %a%   # contains('a')
like a%    # startswith('a')
like %a    # endswith('a')