# DataFrame 인덱싱 

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {'names': ['Hyu','Hyu','Hyu','Charles','Charles'],
       'year':[2014,2015,2016,2015,2016],
       'points':[1.5,1.7,3.6,2.4,2.9]}
df = pd.DataFrame(data, columns=['year','names','points','penalty'],
                 index = ['one','two','three','four','five'])

In [3]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Hyunsuk,1.5,
two,2015,Hyunsuk,1.7,
three,2016,Hyunsuk,3.6,
four,2015,Charles,2.4,
five,2016,Charles,2.9,


In [4]:
#'year' 열만 얻기
df['year']  

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [5]:
df.year

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [6]:
#두 개 이상의 열 얻기
df[['year','points']]

Unnamed: 0,year,points
one,2014,1.5
two,2015,1.7
three,2016,3.6
four,2015,2.4
five,2016,2.9


In [7]:
#값 넣기
df['penalty'] = 0.5

In [8]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Hyunsuk,1.5,0.5
two,2015,Hyunsuk,1.7,0.5
three,2016,Hyunsuk,3.6,0.5
four,2015,Charles,2.4,0.5
five,2016,Charles,2.9,0.5


In [9]:
#행마다 다른 값 넣기
df['penalty'] = [0.1,0.2,0.3,0.4,0.5]

In [10]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Hyunsuk,1.5,0.1
two,2015,Hyunsuk,1.7,0.2
three,2016,Hyunsuk,3.6,0.3
four,2015,Charles,2.4,0.4
five,2016,Charles,2.9,0.5


In [11]:
#columns 추가하고  값 넣기
df['zeros'] = np.arange(5)

In [12]:
df

Unnamed: 0,year,names,points,penalty,zeros
one,2014,Hyunsuk,1.5,0.1,0
two,2015,Hyunsuk,1.7,0.2,1
three,2016,Hyunsuk,3.6,0.3,2
four,2015,Charles,2.4,0.4,3
five,2016,Charles,2.9,0.5,4


In [13]:
val = pd.Series([-1.2, -1.5, -1.7], index = ['two','four','five'])

In [14]:
# 'debt' 열 추가하고, 'val' Series 데이터를 debt 열에 넣는다. 이 때, 인덱스가 같은 것끼리 값이 대입되어진다
df['debt'] = val

In [15]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt
one,2014,Hyunsuk,1.5,0.1,0,
two,2015,Hyunsuk,1.7,0.2,1,-1.2
three,2016,Hyunsuk,3.6,0.3,2,
four,2015,Charles,2.4,0.4,3,-1.5
five,2016,Charles,2.9,0.5,4,-1.7


In [16]:
df['net_points'] = df['points'] - df['penalty']

In [17]:
df['high_points'] = df['net_points'] > 2.0

In [18]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt,net_points,high_points
one,2014,Hyunsuk,1.5,0.1,0,,1.4,False
two,2015,Hyunsuk,1.7,0.2,1,-1.2,1.5,False
three,2016,Hyunsuk,3.6,0.3,2,,3.3,True
four,2015,Charles,2.4,0.4,3,-1.5,2.0,False
five,2016,Charles,2.9,0.5,4,-1.7,2.4,True


In [19]:
#기존의 열 삭제하기
del df['high_points']
del df['net_points']
del df['zeros']

In [20]:
df

Unnamed: 0,year,names,points,penalty,debt
one,2014,Hyunsuk,1.5,0.1,
two,2015,Hyunsuk,1.7,0.2,-1.2
three,2016,Hyunsuk,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Charles,2.9,0.5,-1.7


In [21]:
df.columns

Index(['year', 'names', 'points', 'penalty', 'debt'], dtype='object')

In [22]:
df.index.name = 'Order'
df.columns.name = 'Info'
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,Hyunsuk,1.5,0.1,
two,2015,Hyunsuk,1.7,0.2,-1.2
three,2016,Hyunsuk,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Charles,2.9,0.5,-1.7


In [54]:
# 행 제어하기
df[0:3]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Hyunsuk,1.5,0.1,
two,2015.0,Hyunsuk,1.7,0.2,-1.2
three,2016.0,Hyunsuk,3.6,0.0,


In [24]:
df['two':'four']

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,Hyunsuk,1.7,0.2,-1.2
three,2016,Hyunsuk,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5


In [25]:
# 행선택하는 권장 방법 .loc(), .iloc

In [26]:
# .loc()  행/열의 명칭으로 검색

In [27]:
df.loc['two']  #인덱스 'two' 행의 전체 값 가져오기

Info
year          2015
names      Hyunsuk
points         1.7
penalty        0.2
debt          -1.2
Name: two, dtype: object

In [28]:
df.loc['two':'four']  # 인덱스 'two:four' 행의 전체 값 가져오기

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,Hyunsuk,1.7,0.2,-1.2
three,2016,Hyunsuk,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5


In [29]:
# columns 'points'의 특정 값 가져오기
df.loc['two':'four', 'points']

Order
two      1.7
three    3.6
four     2.4
Name: points, dtype: float64

In [59]:
df.loc[:,'year']

Order
one      2014.0
two      2015.0
three    2016.0
four     2015.0
five     2016.0
six      2017.0
Name: year, dtype: float64

In [31]:
#두 개 이상의 열 검색
df.loc[:,['year','names']]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,2014,Hyunsuk
two,2015,Hyunsuk
three,2016,Hyunsuk
four,2015,Charles
five,2016,Charles


In [32]:
#범위 인덱싱
df.loc['two':'four', ['year','names']]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
two,2015,Hyunsuk
three,2016,Hyunsuk
four,2015,Charles


In [33]:
#행 추가
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,Hyunsuk,1.5,0.1,
two,2015,Hyunsuk,1.7,0.2,-1.2
three,2016,Hyunsuk,3.6,0.3,
four,2015,Charles,2.4,0.4,-1.5
five,2016,Charles,2.9,0.5,-1.7


In [34]:
df.loc['six', :] = [2017, 'Joohee', 3.3, 0.6, -1.3]
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Hyunsuk,1.5,0.1,
two,2015.0,Hyunsuk,1.7,0.2,-1.2
three,2016.0,Hyunsuk,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7
six,2017.0,Joohee,3.3,0.6,-1.3


In [35]:
# .iloc()  numpy방식으로(행/열 자리값으로) 행 가져올 때 사용

In [36]:
df.iloc[3]

Info
year          2015
names      Charles
points         2.4
penalty        0.4
debt          -1.5
Name: four, dtype: object

In [37]:
df.iloc[0:3, 0:2]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,2014.0,Hyunsuk
two,2015.0,Hyunsuk
three,2016.0,Hyunsuk


In [38]:
# 행과 열의 범위를 별도 지정하여 검색
df.iloc[[0,1,3], [1,2]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Hyunsuk,1.5
two,Hyunsuk,1.7
four,Charles,2.4


In [39]:
df.iloc[:, 1:4]

Info,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,Hyunsuk,1.5,0.1
two,Hyunsuk,1.7,0.2
three,Hyunsuk,3.6,0.3
four,Charles,2.4,0.4
five,Charles,2.9,0.5
six,Joohee,3.3,0.6


In [40]:
#값 하나만 가져오기
df.iloc[1,1]

'Hyunsuk'

In [41]:
df.loc[:]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Hyunsuk,1.5,0.1,
two,2015.0,Hyunsuk,1.7,0.2,-1.2
three,2016.0,Hyunsuk,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7
six,2017.0,Joohee,3.3,0.6,-1.3


In [42]:
#불리언 인덱싱

In [43]:
# year의 2014보다 큰 값 찾기
df['year'] > 2014

Order
one      False
two       True
three     True
four      True
five      True
six       True
Name: year, dtype: bool

In [60]:
add = df['year'] > 2014
df['add'] = add
df.loc[:]

Info,year,names,points,penalty,debt,add
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
one,2014.0,Hyunsuk,1.5,0.1,,False
two,2015.0,Hyunsuk,1.7,0.2,-1.2,True
three,2016.0,Hyunsuk,3.6,0.0,,True
four,2015.0,Charles,2.4,0.4,-1.5,True
five,2016.0,Charles,2.9,0.5,-1.7,True
six,2017.0,Joohee,3.3,0.0,-1.3,True


In [44]:
df.loc[df['year'] > 2014, :]  # 불리언 마스크한 부분을 행으값으로 지정

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015.0,Hyunsuk,1.7,0.2,-1.2
three,2016.0,Hyunsuk,3.6,0.3,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7
six,2017.0,Joohee,3.3,0.6,-1.3


In [45]:
df.loc[df['names'] == 'Hyunsuk', ['names', 'points']]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Hyunsuk,1.5
two,Hyunsuk,1.7
three,Hyunsuk,3.6


In [46]:
df.loc[(df['points'] > 2) & (df['points'] < 3), :]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7


In [47]:
# 새로운 값으로 대입
df.loc[df['points'] > 3, 'penalty'] = 0

In [48]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Hyunsuk,1.5,0.1,
two,2015.0,Hyunsuk,1.7,0.2,-1.2
three,2016.0,Hyunsuk,3.6,0.0,
four,2015.0,Charles,2.4,0.4,-1.5
five,2016.0,Charles,2.9,0.5,-1.7
six,2017.0,Joohee,3.3,0.0,-1.3
