# 특정 값이 최대값을 가지는 행 추출
---

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

df = sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


# boolean indexing
---

시리즈 객체에 특정 조건식을 적용해 해당 조건에 참인 행(row)을 추출하기 위해선 boolean indexing을 이용할 수 있다.

다음의 예제로 성별이 남성이면서 생존한 인원에 대한 행들을 추출해보자

In [19]:
condition = (df.sex == 'male') & (df.survived ==1) # 조건식 작성

df[condition]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
17,1,2,male,,0,0,13.0000,S,Second,man,True,,Southampton,yes,True
21,1,2,male,34.0,0,0,13.0000,S,Second,man,True,D,Southampton,yes,True
23,1,1,male,28.0,0,0,35.5000,S,First,man,True,A,Southampton,yes,True
36,1,3,male,,0,0,7.2292,C,Third,man,True,,Cherbourg,yes,True
55,1,1,male,,0,0,35.5000,S,First,man,True,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,1,3,male,32.0,0,0,56.4958,S,Third,man,True,,Southampton,yes,True
839,1,1,male,,0,0,29.7000,C,First,man,True,C,Cherbourg,yes,True
857,1,1,male,51.0,0,0,26.5500,S,First,man,True,E,Southampton,yes,True
869,1,3,male,4.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False


In [20]:
df.loc[condition] # loc를 이용해도 무방하다.

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
17,1,2,male,,0,0,13.0000,S,Second,man,True,,Southampton,yes,True
21,1,2,male,34.0,0,0,13.0000,S,Second,man,True,D,Southampton,yes,True
23,1,1,male,28.0,0,0,35.5000,S,First,man,True,A,Southampton,yes,True
36,1,3,male,,0,0,7.2292,C,Third,man,True,,Cherbourg,yes,True
55,1,1,male,,0,0,35.5000,S,First,man,True,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,1,3,male,32.0,0,0,56.4958,S,Third,man,True,,Southampton,yes,True
839,1,1,male,,0,0,29.7000,C,First,man,True,C,Cherbourg,yes,True
857,1,1,male,51.0,0,0,26.5500,S,First,man,True,E,Southampton,yes,True
869,1,3,male,4.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False


이번에는 or 연산자를 이용해 pclass가 1 혹은 2인 행들을 추출해보자.

In [28]:
condition = (df.pclass == 1) | (df.pclass == 2)

df.loc[condition, ['survived', 'sex', 'pclass']] # 필터링 된 row의 특정 컬럼만 추출

Unnamed: 0,survived,sex,pclass
1,1,female,1
3,1,female,1
6,0,male,1
9,1,female,2
11,1,female,1
...,...,...,...
880,1,female,2
883,0,male,2
886,0,male,2
887,1,female,1


# isin() 
---

sibsp가 1, 2, 3의 값을 갖는 데이터만 추출하기 위해선 아래의 방법처럼 boolean indexing을 사용할 수 있다.

In [31]:
con1 = df['sibsp'] == 1
con2 = df['sibsp'] == 2
con3 = df['sibsp'] == 3

filtered_df = df.loc[con1 | con2 | con3, :]
filtered_df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
7,0,3,male,2.0,3,1,21.0750,S,Third,child,False,,Southampton,no,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
866,1,2,female,27.0,1,0,13.8583,C,Second,woman,False,,Cherbourg,yes,False
869,1,3,male,4.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False
874,1,2,female,28.0,1,0,24.0000,C,Second,woman,False,,Cherbourg,yes,False


위처럼 특정한 리스트 형태의 값들을 포함하는 행을 추출하고 싶다면 아래와 같이 isin 함수를 사용하는 것이 더욱 간편하다.

In [32]:
isin_con = df['sibsp'].isin([1, 2, 3])
df.loc[isin_con]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
7,0,3,male,2.0,3,1,21.0750,S,Third,child,False,,Southampton,no,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
866,1,2,female,27.0,1,0,13.8583,C,Second,woman,False,,Cherbourg,yes,False
869,1,3,male,4.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False
874,1,2,female,28.0,1,0,24.0000,C,Second,woman,False,,Cherbourg,yes,False
