# 데이터 핸들링을 위한 판다스 기초2

## 인덱싱 / 슬라이싱

In [1]:
# 학습 전 실행: csv파일 (data.csv) 생성
import pandas as pd
data = {
    "메뉴":['아메리카노','카페라떼','카페모카', '바닐라라떼', '녹차', '초코라떼', '바닐라콜드브루'],
    "가격":[4100, 4600, 4600, 5100, 4100, 5000, 5100],
    "할인율":[0.5, 0.1, 0.2, 0.3, 0, 0, 0],
    "칼로리":[10, 180, 420, 320, 20, 500, 400],
}
data = pd.DataFrame(data)
data.to_csv('data.csv', index=False)

In [2]:
# 데이터 불러오기
df = pd.read_csv('data.csv')
df

Unnamed: 0,메뉴,가격,할인율,칼로리
0,아메리카노,4100,0.5,10
1,카페라떼,4600,0.1,180
2,카페모카,4600,0.2,420
3,바닐라라떼,5100,0.3,320
4,녹차,4100,0.0,20
5,초코라떼,5000,0.0,500
6,바닐라콜드브루,5100,0.0,400


### loc
- 인덱스 명
- 인덱스 명(범위), 컬럼 명(범위)

In [3]:
# 인덱싱 (행 전체)
# 아메리카노
df.loc[0]

메뉴     아메리카노
가격      4100
할인율      0.5
칼로리       10
Name: 0, dtype: object

In [4]:
# 슬라이싱 (컬럼 전체)
# 가격
df.loc[:,'가격']

0    4100
1    4600
2    4600
3    5100
4    4100
5    5000
6    5100
Name: 가격, dtype: int64

In [5]:
# 슬라이싱
# 카페라떼 가격
df.loc[1,'가격']

4600

In [6]:
df

Unnamed: 0,메뉴,가격,할인율,칼로리
0,아메리카노,4100,0.5,10
1,카페라떼,4600,0.1,180
2,카페모카,4600,0.2,420
3,바닐라라떼,5100,0.3,320
4,녹차,4100,0.0,20
5,초코라떼,5000,0.0,500
6,바닐라콜드브루,5100,0.0,400


In [7]:
# 슬라이싱
# 카페모카 메뉴와 가격
df.loc[2,'메뉴':'가격']

메뉴    카페모카
가격    4600
Name: 2, dtype: object

In [8]:
# 슬라이싱
# 카페모카 메뉴와 칼로리
df.loc[2, ['메뉴','칼로리']]

메뉴     카페모카
칼로리     420
Name: 2, dtype: object

In [9]:
# 슬라이싱
# 카페라떼, 카페모카 메뉴와 가격
df.loc[1:2,'메뉴':'가격']

Unnamed: 0,메뉴,가격
1,카페라떼,4600
2,카페모카,4600


### iloc
- 인덱스 번호
- 인덱스 번호(범위), 컬럼 번호(범위)

In [10]:
df

Unnamed: 0,메뉴,가격,할인율,칼로리
0,아메리카노,4100,0.5,10
1,카페라떼,4600,0.1,180
2,카페모카,4600,0.2,420
3,바닐라라떼,5100,0.3,320
4,녹차,4100,0.0,20
5,초코라떼,5000,0.0,500
6,바닐라콜드브루,5100,0.0,400


In [11]:
# 인덱싱 (행 전체)
# 아메리카노
df.iloc[0]

메뉴     아메리카노
가격      4100
할인율      0.5
칼로리       10
Name: 0, dtype: object

In [12]:
# 슬라이싱 (컬럼 전체)
# 가격
df.iloc[:,1]

0    4100
1    4600
2    4600
3    5100
4    4100
5    5000
6    5100
Name: 가격, dtype: int64

In [13]:
df

Unnamed: 0,메뉴,가격,할인율,칼로리
0,아메리카노,4100,0.5,10
1,카페라떼,4600,0.1,180
2,카페모카,4600,0.2,420
3,바닐라라떼,5100,0.3,320
4,녹차,4100,0.0,20
5,초코라떼,5000,0.0,500
6,바닐라콜드브루,5100,0.0,400


In [14]:
# 슬라이싱
# 카페모카 메뉴와 가격
df.iloc[2,:2]

메뉴    카페모카
가격    4600
Name: 2, dtype: object

In [15]:
# 슬라이싱 (카페라뗴와 카페모카의 전체 데이터)
df.iloc[1:3]

Unnamed: 0,메뉴,가격,할인율,칼로리
1,카페라떼,4600,0.1,180
2,카페모카,4600,0.2,420


## 데이터 추가

In [16]:
# 결측값으로 추가, 원두 컬럼을 만들고 결측값(NaN)으로 대입
import numpy as np
df['원두'] = np.nan
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,아메리카노,4100,0.5,10,
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,
5,초코라떼,5000,0.0,500,
6,바닐라콜드브루,5100,0.0,400,


In [17]:
# 아메리카노의 원두는 '콜롬비아' 데이터 추가
df.loc[0, "원두"] = '콜롬비아'
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,아메리카노,4100,0.5,10,콜롬비아
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,
5,초코라떼,5000,0.0,500,
6,바닐라콜드브루,5100,0.0,400,


In [18]:
# 리스트 형태로 데이터(행) 추가
df.loc['시즌'] = ['크리스마스라떼', 6000, 0, 500, '한국']
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,아메리카노,4100,0.5,10,콜롬비아
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,
5,초코라떼,5000,0.0,500,
6,바닐라콜드브루,5100,0.0,400,
시즌,크리스마스라떼,6000,0.0,500,한국


In [19]:
# 딕셔너리 형태로 데이터(행) 추가
df.loc[7] = {'메뉴':'에소프레소', '가격':2000, '칼로리':10}
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,아메리카노,4100,0.5,10,콜롬비아
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,
5,초코라떼,5000,0.0,500,
6,바닐라콜드브루,5100,0.0,400,
시즌,크리스마스라떼,6000,0.0,500,한국
7,에소프레소,2000,,10,


## Sorting

In [20]:
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,아메리카노,4100,0.5,10,콜롬비아
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,
5,초코라떼,5000,0.0,500,
6,바닐라콜드브루,5100,0.0,400,
시즌,크리스마스라떼,6000,0.0,500,한국
7,에소프레소,2000,,10,


In [24]:
df = df.drop('시즌',axis=0)
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,아메리카노,4100,0.5,10,콜롬비아
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,
5,초코라떼,5000,0.0,500,
6,바닐라콜드브루,5100,0.0,400,
7,에소프레소,2000,,10,


In [26]:
# 인덱스 기준 (기본값 ascending = True)
df.sort_index(ascending=False)

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
7,에소프레소,2000,,10,
6,바닐라콜드브루,5100,0.0,400,
5,초코라떼,5000,0.0,500,
4,녹차,4100,0.0,20,
3,바닐라라떼,5100,0.3,320,
2,카페모카,4600,0.2,420,
1,카페라떼,4600,0.1,180,
0,아메리카노,4100,0.5,10,콜롬비아


In [34]:
# 값 기준 (기본값 ascending = True)
# 메뉴 기준 정렬
df = df.sort_values('메뉴',ascending = False)
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
2,카페모카,4600,0.2,420,
1,카페라떼,4600,0.1,180,
5,초코라떼,5000,0.0,500,
7,에소프레소,2000,,10,
0,아메리카노,4100,0.5,10,콜롬비아
6,바닐라콜드브루,5100,0.0,400,
3,바닐라라떼,5100,0.3,320,
4,녹차,4100,0.0,20,


In [35]:
# 가격과 메뉴 기준 정렬
df = df.sort_values(['가격','메뉴'],ascending = [False,True])
df

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
3,바닐라라떼,5100,0.3,320,
6,바닐라콜드브루,5100,0.0,400,
5,초코라떼,5000,0.0,500,
1,카페라떼,4600,0.1,180,
2,카페모카,4600,0.2,420,
4,녹차,4100,0.0,20,
0,아메리카노,4100,0.5,10,콜롬비아
7,에소프레소,2000,,10,


In [37]:
# 인덱스 새로 만들기 drop = True
df.reset_index(drop = True)

Unnamed: 0,메뉴,가격,할인율,칼로리,원두
0,바닐라라떼,5100,0.3,320,
1,바닐라콜드브루,5100,0.0,400,
2,초코라떼,5000,0.0,500,
3,카페라떼,4600,0.1,180,
4,카페모카,4600,0.2,420,
5,녹차,4100,0.0,20,
6,아메리카노,4100,0.5,10,콜롬비아
7,에소프레소,2000,,10,
