# 접근자(Accessor)

- DF/SR에 속성으로 제공되며 다른 객체의 기능/속성을 사용할 수 있도록 해줌

- 종류
    - Series.dt : datetime 관련 속성 사용할 수 있도록 해줌
    - Series.str : string 관련 메서드 사용할 수 있도록 해줌
    - DataFrame.spare : spare 관련 속성/메서드 사용할 수 있도록 해줌

In [104]:
# 모듈 로딩
import pandas as pd
import random

# 데이터생성 ---------------------------------------
# 이름, 생년월일, 나이 3개 컬럼으로 구성 
# 총 10명, 생일이 한 달 차이로 일자는 똑같도록
# 이름은 영문 (ex: Tom Kim)
df=pd.DataFrame({'name':['Tom Kim', 'Mab Lee', 'Mat Park', 'Michael Kim', 'Gildong Hong', 'Jaesuck You', 'Red Park', 'Yellow Kim', 'Blue Lee', 'Purple Choi'],
                'birth':pd.date_range('2012-01-07', periods=10, freq='M'),
                'age':[random.randint(17, 25) for num in range(10)]})

In [105]:
df

Unnamed: 0,name,birth,age
0,Tom Kim,2012-01-31,25
1,Mab Lee,2012-02-29,18
2,Mat Park,2012-03-31,22
3,Michael Kim,2012-04-30,18
4,Gildong Hong,2012-05-31,21
5,Jaesuck You,2012-06-30,20
6,Red Park,2012-07-31,23
7,Yellow Kim,2012-08-31,23
8,Blue Lee,2012-09-30,23
9,Purple Choi,2012-10-31,18


In [106]:
# name 컬럼의 이름을 분리하기
df['name'].str.split()

0         [Tom, Kim]
1         [Mab, Lee]
2        [Mat, Park]
3     [Michael, Kim]
4    [Gildong, Hong]
5     [Jaesuck, You]
6        [Red, Park]
7      [Yellow, Kim]
8        [Blue, Lee]
9     [Purple, Choi]
Name: name, dtype: object

In [107]:
# name 컬럼의 이름을 모두 대문자로 바꾸고 싶음
df['name'].str.upper()

0         TOM KIM
1         MAB LEE
2        MAT PARK
3     MICHAEL KIM
4    GILDONG HONG
5     JAESUCK YOU
6        RED PARK
7      YELLOW KIM
8        BLUE LEE
9     PURPLE CHOI
Name: name, dtype: object

In [108]:
# 각각의 이름 안에 a가 포함되어 있는지가 궁금함
df['name'].str.contains('a')

0    False
1     True
2     True
3     True
4    False
5     True
6     True
7    False
8    False
9    False
Name: name, dtype: bool

## str 데이터로 구성된 컬럼(Series)의 str 접근자

- 전체 컬럼의 요소에 str 관련 메서드 사용 가능하도록 하는 접근자
- 사용법
    - 객체변수[컬럼명].str.메서드() => upper, lower, split, ...
    - 객체변수[컬럼명].str.get(인덱스) => 분리 문자리스트에서 특정 요소 추출

In [109]:
# 문자열 분리
flname=df['name'].str.split()

In [110]:
# 내가 한 방법
#df['first_name']=pd.DataFrame([df['name'].str.split()[idx][0] for idx in df.index])

In [111]:
df

Unnamed: 0,name,birth,age
0,Tom Kim,2012-01-31,25
1,Mab Lee,2012-02-29,18
2,Mat Park,2012-03-31,22
3,Michael Kim,2012-04-30,18
4,Gildong Hong,2012-05-31,21
5,Jaesuck You,2012-06-30,20
6,Red Park,2012-07-31,23
7,Yellow Kim,2012-08-31,23
8,Blue Lee,2012-09-30,23
9,Purple Choi,2012-10-31,18


In [112]:
# 내가 한 방법
#df['second_name']=pd.DataFrame([df['name'].str.split()[idx][1] for idx in df.index])

In [113]:
df

Unnamed: 0,name,birth,age
0,Tom Kim,2012-01-31,25
1,Mab Lee,2012-02-29,18
2,Mat Park,2012-03-31,22
3,Michael Kim,2012-04-30,18
4,Gildong Hong,2012-05-31,21
5,Jaesuck You,2012-06-30,20
6,Red Park,2012-07-31,23
7,Yellow Kim,2012-08-31,23
8,Blue Lee,2012-09-30,23
9,Purple Choi,2012-10-31,18


In [114]:
# 분리된 문자열 리스트에서 특정 값 가져오기
flname.str.get(0)

0        Tom
1        Mab
2        Mat
3    Michael
4    Gildong
5    Jaesuck
6        Red
7     Yellow
8       Blue
9     Purple
Name: name, dtype: object

In [115]:
flname.str.get(1)

0     Kim
1     Lee
2    Park
3     Kim
4    Hong
5     You
6    Park
7     Kim
8     Lee
9    Choi
Name: name, dtype: object

In [116]:
df['first_name']=flname.str.get(0)
df['last_name']=flname.str.get(1)
df

Unnamed: 0,name,birth,age,first_name,last_name
0,Tom Kim,2012-01-31,25,Tom,Kim
1,Mab Lee,2012-02-29,18,Mab,Lee
2,Mat Park,2012-03-31,22,Mat,Park
3,Michael Kim,2012-04-30,18,Michael,Kim
4,Gildong Hong,2012-05-31,21,Gildong,Hong
5,Jaesuck You,2012-06-30,20,Jaesuck,You
6,Red Park,2012-07-31,23,Red,Park
7,Yellow Kim,2012-08-31,23,Yellow,Kim
8,Blue Lee,2012-09-30,23,Blue,Lee
9,Purple Choi,2012-10-31,18,Purple,Choi


In [117]:
# str.split()
# expand=True로 넣으면 전부 컬럼으로 만들어짐
value=df['name'].str.split(expand=True)
type(value)

pandas.core.frame.DataFrame

In [118]:
# value의 컬럼 이름 변경
value.columns=['first_name_2', 'last_name_2']

In [119]:
df=pd.concat([df, value], axis=1)

In [120]:
df

Unnamed: 0,name,birth,age,first_name,last_name,first_name_2,last_name_2
0,Tom Kim,2012-01-31,25,Tom,Kim,Tom,Kim
1,Mab Lee,2012-02-29,18,Mab,Lee,Mab,Lee
2,Mat Park,2012-03-31,22,Mat,Park,Mat,Park
3,Michael Kim,2012-04-30,18,Michael,Kim,Michael,Kim
4,Gildong Hong,2012-05-31,21,Gildong,Hong,Gildong,Hong
5,Jaesuck You,2012-06-30,20,Jaesuck,You,Jaesuck,You
6,Red Park,2012-07-31,23,Red,Park,Red,Park
7,Yellow Kim,2012-08-31,23,Yellow,Kim,Yellow,Kim
8,Blue Lee,2012-09-30,23,Blue,Lee,Blue,Lee
9,Purple Choi,2012-10-31,18,Purple,Choi,Purple,Choi


In [121]:
# 특정 컬럼 이름만 변경하기
df.rename(columns={'first_name_2':'F_N', 'last_name_2':'L_N'}, inplace=True)

In [122]:
df

Unnamed: 0,name,birth,age,first_name,last_name,F_N,L_N
0,Tom Kim,2012-01-31,25,Tom,Kim,Tom,Kim
1,Mab Lee,2012-02-29,18,Mab,Lee,Mab,Lee
2,Mat Park,2012-03-31,22,Mat,Park,Mat,Park
3,Michael Kim,2012-04-30,18,Michael,Kim,Michael,Kim
4,Gildong Hong,2012-05-31,21,Gildong,Hong,Gildong,Hong
5,Jaesuck You,2012-06-30,20,Jaesuck,You,Jaesuck,You
6,Red Park,2012-07-31,23,Red,Park,Red,Park
7,Yellow Kim,2012-08-31,23,Yellow,Kim,Yellow,Kim
8,Blue Lee,2012-09-30,23,Blue,Lee,Blue,Lee
9,Purple Choi,2012-10-31,18,Purple,Choi,Purple,Choi


## 실습
- stock-data.csv
- 접근자 사용하여 Date 컬럼 분할(Y, M, D)
- D일 기준으로 정렬

In [123]:
# stock-data.csv => Date 컬럼을 연, 월, 일 따로 컬럼 추가
stock=pd.read_csv('../Data/stock-data.csv')
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    20 non-null     object
 1   Close   20 non-null     int64 
 2   Start   20 non-null     int64 
 3   High    20 non-null     int64 
 4   Low     20 non-null     int64 
 5   Volume  20 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 1.1+ KB


In [124]:
#stock['Date']=pd.to_datetime(stock['Date'])
# 먼저 바꾸니까 .str 안써짐,,
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    20 non-null     object
 1   Close   20 non-null     int64 
 2   Start   20 non-null     int64 
 3   High    20 non-null     int64 
 4   Low     20 non-null     int64 
 5   Volume  20 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 1.1+ KB


In [125]:
date=stock['Date'].str.split('-')
date

0     [2018, 07, 02]
1     [2018, 06, 29]
2     [2018, 06, 28]
3     [2018, 06, 27]
4     [2018, 06, 26]
5     [2018, 06, 25]
6     [2018, 06, 22]
7     [2018, 06, 21]
8     [2018, 06, 20]
9     [2018, 06, 19]
10    [2018, 06, 18]
11    [2018, 06, 15]
12    [2018, 06, 14]
13    [2020, 06, 12]
14    [2020, 06, 11]
15    [2020, 06, 08]
16    [2021, 06, 07]
17    [2022, 06, 05]
18    [2022, 06, 04]
19    [2022, 06, 01]
Name: Date, dtype: object

In [126]:
stock['Year']=date.str.get(0)
stock['Month']=date.str.get(1)
stock['Day']=date.str.get(2)

In [127]:
stock

Unnamed: 0,Date,Close,Start,High,Low,Volume,Year,Month,Day
0,2018-07-02,10100,10850,10900,10000,137977,2018,7,2
1,2018-06-29,10700,10550,10900,9990,170253,2018,6,29
2,2018-06-28,10400,10900,10950,10150,155769,2018,6,28
3,2018-06-27,10900,10800,11050,10500,133548,2018,6,27
4,2018-06-26,10800,10900,11000,10700,63039,2018,6,26
5,2018-06-25,11150,11400,11450,11000,55519,2018,6,25
6,2018-06-22,11300,11250,11450,10750,134805,2018,6,22
7,2018-06-21,11200,11350,11750,11200,133002,2018,6,21
8,2018-06-20,11550,11200,11600,10900,308596,2018,6,20
9,2018-06-19,11300,11850,11950,11300,180656,2018,6,19


In [128]:
# Date 컬럼을 datetime 자료형으로 변환
stock['Date']=pd.to_datetime(stock['Date'])

In [129]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    20 non-null     datetime64[ns]
 1   Close   20 non-null     int64         
 2   Start   20 non-null     int64         
 3   High    20 non-null     int64         
 4   Low     20 non-null     int64         
 5   Volume  20 non-null     int64         
 6   Year    20 non-null     object        
 7   Month   20 non-null     object        
 8   Day     20 non-null     object        
dtypes: datetime64[ns](1), int64(5), object(3)
memory usage: 1.5+ KB


In [130]:
stock.sort_values(by=['Day'])

Unnamed: 0,Date,Close,Start,High,Low,Volume,Year,Month,Day
19,2022-06-01,11900,11800,12100,11750,32062,2022,6,1
0,2018-07-02,10100,10850,10900,10000,137977,2018,7,2
18,2022-06-04,11900,11900,12200,11700,25171,2022,6,4
17,2022-06-05,12150,11800,12250,11800,42485,2022,6,5
16,2021-06-07,11950,12200,12300,11900,49088,2021,6,7
15,2020-06-08,11950,11950,12200,11800,59258,2020,6,8
14,2020-06-11,11950,12000,12250,11950,62293,2020,6,11
13,2020-06-12,13200,12200,13300,12050,558148,2020,6,12
12,2018-06-14,13450,13200,13700,13150,347451,2018,6,14
11,2018-06-15,13400,13600,13600,12900,201376,2018,6,15
