In [1]:
import pandas as pd

In [4]:
df = pd.read_csv('./parks.csv', index_col=['Park Code'])

In [5]:
df.head()

Unnamed: 0_level_0,Park Name,State,Acres,Latitude,Longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACAD,Acadia National Park,ME,47390,44.35,-68.21
ARCH,Arches National Park,UT,76519,38.68,-109.57
BADL,Badlands National Park,SD,242756,43.75,-102.5
BIBE,Big Bend National Park,TX,801163,29.25,-103.25
BISC,Biscayne National Park,FL,172924,25.65,-80.08


### Indexing : Row

In [6]:
# iloc과 loc은 기본적으로 row index
# index row by positional
df.iloc[2]

Park Name    Badlands National Park
State                            SD
Acres                        242756
Latitude                      43.75
Longitude                    -102.5
Name: BADL, dtype: object

In [7]:
# index row by label based
df.loc['BADL']

Park Name    Badlands National Park
State                            SD
Acres                        242756
Latitude                      43.75
Longitude                    -102.5
Name: BADL, dtype: object

### Indexing : Multiple Row

In [8]:
# 슬라이싱으로 인덱싱 시에는 기본적 방법으로 접근
df.iloc[4:7]

Unnamed: 0_level_0,Park Name,State,Acres,Latitude,Longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BISC,Biscayne National Park,FL,172924,25.65,-80.08
BLCA,Black Canyon of the Gunnison National Park,CO,32950,38.57,-107.72
BRCA,Bryce Canyon National Park,UT,35835,37.57,-112.18


In [11]:
# 따로 따로 인덱싱 할경우에는 모음을 리스트로 넘겨준다.
df.iloc[[2,5,8]]

Unnamed: 0_level_0,Park Name,State,Acres,Latitude,Longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BADL,Badlands National Park,SD,242756,43.75,-102.5
BLCA,Black Canyon of the Gunnison National Park,CO,32950,38.57,-107.72
CARE,Capitol Reef National Park,UT,241904,38.2,-111.17


In [10]:
# loc으로 multiple indexing 할 경우에는 문자열을 리스트로 넘겨준다.
df.loc[['BISC', 'BLCA', 'BRCA']]

Unnamed: 0_level_0,Park Name,State,Acres,Latitude,Longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BISC,Biscayne National Park,FL,172924,25.65,-80.08
BLCA,Black Canyon of the Gunnison National Park,CO,32950,38.57,-107.72
BRCA,Bryce Canyon National Park,UT,35835,37.57,-112.18


### Indexing : Columns

In [31]:
# 사전형식 접근
df['state'].head(3)

df[['state','acres']].head() # 멀티플 인덱싱은 row와 동일 방법으로 진행한다.

Unnamed: 0_level_0,state,acres
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1
ACAD,ME,47390
ARCH,UT,76519
BADL,SD,242756
BIBE,TX,801163
BISC,FL,172924


In [13]:
# 속성형식 접근
df.State.head(3)

Park Code
ACAD    ME
ARCH    UT
BADL    SD
Name: State, dtype: object

In [14]:
df.columns

Index(['Park Name', 'State', 'Acres', 'Latitude', 'Longitude'], dtype='object')

In [16]:
col_names = [col.replace(' ','_').lower() for col in df.columns]

['park_name', 'state', 'acres', 'latitude', 'longitude']

In [17]:
df.columns = col_names

In [19]:
df.columns

Index(['park_name', 'state', 'acres', 'latitude', 'longitude'], dtype='object')

### Indexing : Columns and Rows

In [27]:
# column이나 row나 어떤것이 먼저 나오든 상관없음.
# 직관적으로 column이 먼저나오는 것이 보기 좋은듯
df[['state','acres']][:4]

Unnamed: 0_level_0,state,acres
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1
ACAD,ME,47390
ARCH,UT,76519
BADL,SD,242756
BIBE,TX,801163


In [28]:
df[:4][['state','acres']]

Unnamed: 0_level_0,state,acres
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1
ACAD,ME,47390
ARCH,UT,76519
BADL,SD,242756
BIBE,TX,801163


In [32]:
df.state.iloc[2]

'SD'

In [33]:
df.state.iloc[[2]]

Park Code
BADL    SD
Name: state, dtype: object

### Selecting a Subset of the Data

##### pandas에서 데이터를 subsetting 하기 위한 주요 방법은 boolean indexing이다.

In [37]:
(df.state == 'UT').head()

Park Code
ACAD    False
ARCH     True
BADL    False
BIBE    False
BISC    False
Name: state, dtype: bool

##### 위와 같은 불리언 결과를 다시 DataFrame에 전달하면 boolean이 True로 평가하는 DataFrame의 하위집합(Subset)이 된다.

In [40]:
df[df.state == 'UT']

Unnamed: 0_level_0,park_name,state,acres,latitude,longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ARCH,Arches National Park,UT,76519,38.68,-109.57
BRCA,Bryce Canyon National Park,UT,35835,37.57,-112.18
CANY,Canyonlands National Park,UT,337598,38.2,-109.93
CARE,Capitol Reef National Park,UT,241904,38.2,-111.17
ZION,Zion National Park,UT,146598,37.3,-113.05


##### logical operators
* ~ replaces not
* | replaces or
* & replaces and

In [43]:
df[(df.latitude>60) | (df.acres>10**6)].head()

Unnamed: 0_level_0,park_name,state,acres,latitude,longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DENA,Denali National Park and Preserve,AK,3372402,63.33,-150.5
DEVA,Death Valley National Park,"CA, NV",4740912,36.24,-116.82
EVER,Everglades National Park,FL,1508538,25.32,-80.93
GAAR,Gates Of The Arctic National Park and Preserve,AK,7523898,67.78,-153.3
GLAC,Glacier National Park,MT,1013572,48.8,-114.0


##### You can also use more complicated expressions, including lambdas.

In [50]:
df[df['park_name'].str.split().apply(lambda x:len(x)==3)].head()

Unnamed: 0_level_0,park_name,state,acres,latitude,longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACAD,Acadia National Park,ME,47390,44.35,-68.21
ARCH,Arches National Park,UT,76519,38.68,-109.57
BADL,Badlands National Park,SD,242756,43.75,-102.5
BISC,Biscayne National Park,FL,172924,25.65,-80.08
CANY,Canyonlands National Park,UT,337598,38.2,-109.93


### key Companion Methods : isin / isnull

In [52]:
df[df.state.isin(['WA','OR','CA'])].head()

Unnamed: 0_level_0,park_name,state,acres,latitude,longitude
Park Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CHIS,Channel Islands National Park,CA,249561,34.01,-119.42
CRLA,Crater Lake National Park,OR,183224,42.94,-122.1
JOTR,Joshua Tree National Park,CA,789745,33.79,-115.9
LAVO,Lassen Volcanic National Park,CA,106372,40.49,-121.51
MORA,Mount Rainier National Park,WA,235625,46.85,-121.75
