In [1]:
import numpy as np
import pandas as pd

In [11]:
titanic_df = pd.read_csv('titanic_train.csv')
print('단일 컬럼 데이터 추출:\n', titanic_df['Pclass'].head(3))
print('\n여러 컬럼들의 데이터 추출:\n', titanic_df[['Survived', 'Pclass']].head(3))

# 아래처럼 코딩하는건 좋지 않다.
# 차라리 Boolean Indexing으로 사용하는게 좋다.
print('[ ] 안에 숫자 index는 KeyError 오류 발생:\n', titanic_df[0])

단일 컬럼 데이터 추출:
 0    3
1    1
2    3
Name: Pclass, dtype: int64

여러 컬럼들의 데이터 추출:
    Survived  Pclass
0         0       3
1         1       1
2         1       3


KeyError: 0

In [13]:
data = {'Name': ['Chulmin', 'Eunkyung','Jinwoong','Soobeom'],
        'Year': [2011, 2016, 2015, 2015],
        'Gender': ['Male', 'Female', 'Male', 'Male']
       }
data_df = pd.DataFrame(data, index=['one','two','three','four'])
data_df

Unnamed: 0,Name,Year,Gender
one,Chulmin,2011,Male
two,Eunkyung,2016,Female
three,Jinwoong,2015,Male
four,Soobeom,2015,Male


In [19]:
print("\n iloc[0]", data_df.iloc[0])
print("\n loc['one']", data_df.loc['one'])


 iloc[0] Name      Chulmin
Year         2011
Gender       Male
Name: one, dtype: object

 loc['one'] Name      Chulmin
Year         2011
Gender       Male
Name: one, dtype: object


In [20]:
# data_df 를 reset_index() 로 새로운 숫자형 인덱스를 생성
data_df_reset = data_df.reset_index()
data_df_reset = data_df_reset.rename(columns={'index':'old_index'})

# index 값에 1을 더해서 1부터 시작하는 새로운 index값 생성
data_df_reset.index = data_df_reset.index+1
data_df_reset

Unnamed: 0,old_index,Name,Year,Gender
1,one,Chulmin,2011,Male
2,two,Eunkyung,2016,Female
3,three,Jinwoong,2015,Male
4,four,Soobeom,2015,Male


### iloc (위치기반)

In [21]:
data_df.head()

Unnamed: 0,Name,Year,Gender
one,Chulmin,2011,Male
two,Eunkyung,2016,Female
three,Jinwoong,2015,Male
four,Soobeom,2015,Male


In [22]:
data_df.iloc[0, 0]

'Chulmin'

In [23]:
# 아래 코드는 오류를 발생시킴
data_df.iloc['Name', 0]

ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types

In [24]:
data_df.reset_index()

Unnamed: 0,index,Name,Year,Gender
0,one,Chulmin,2011,Male
1,two,Eunkyung,2016,Female
2,three,Jinwoong,2015,Male
3,four,Soobeom,2015,Male


### loc (명칭기반)

In [25]:
data_df

Unnamed: 0,Name,Year,Gender
one,Chulmin,2011,Male
two,Eunkyung,2016,Female
three,Jinwoong,2015,Male
four,Soobeom,2015,Male


In [26]:
data_df.loc['one', 'Name']

'Chulmin'

In [27]:
data_df_reset.loc[1, 'Name']

'Chulmin'

In [28]:
data_df_reset.loc[0, 'Name']

KeyError: 0

### 불린 인덱싱(Boolean Indexing)

In [29]:
titanic_df = pd.read_csv('titanic_train.csv')

In [30]:
titanic_boolean = titanic_df[titanic_df['Age'] > 60]

In [31]:
titanic_boolean

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
33,34,0,2,"Wheadon, Mr. Edward H",male,66.0,0,0,C.A. 24579,10.5,,S
54,55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
170,171,0,1,"Van der hoef, Mr. Wyckoff",male,61.0,0,0,111240,33.5,B19,S
252,253,0,1,"Stead, Mr. William Thomas",male,62.0,0,0,113514,26.55,C87,S
275,276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S
280,281,0,3,"Duane, Mr. Frank",male,65.0,0,0,336439,7.75,,Q
326,327,0,3,"Nysveen, Mr. Johan Hansen",male,61.0,0,0,345364,6.2375,,S
438,439,0,1,"Fortune, Mr. Mark",male,64.0,1,4,19950,263.0,C23 C25 C27,S


In [36]:
var1 = titanic_df['Age'] > 60
print('결과:\n', var1)
print(type(var1))

결과:
 0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Age, Length: 891, dtype: bool
<class 'pandas.core.series.Series'>


In [37]:
titanic_df[titanic_df['Age'] > 60][['Name', 'Age']].head(3)

Unnamed: 0,Name,Age
33,"Wheadon, Mr. Edward H",66.0
54,"Ostby, Mr. Engelhart Cornelius",65.0
96,"Goldschmidt, Mr. George B",71.0


In [38]:
titanic_df[['Name', 'Age']][titanic_df['Age'] > 60].head(3)

Unnamed: 0,Name,Age
33,"Wheadon, Mr. Edward H",66.0
54,"Ostby, Mr. Engelhart Cornelius",65.0
96,"Goldschmidt, Mr. George B",71.0


In [40]:
titanic_df['Age_cat'] = titanic_df['Age'].apply(lambda x : 'Child' if x<=15 else ('Adult' if x <= 60 else 
                                                                                  'Elderly'))
titanic_df['Age_cat'].value_counts()

Adult      609
Elderly    199
Child       83
Name: Age_cat, dtype: int64