# 날짜 시간 관련 PANDAS
- 관련 자료형 : timestamp, datetime64, period
- 관련 메서드 : to_datetime(). date_range(), to_peroid(), peroid_range()

In [6]:
# 모듈 로딩
import pandas as pd

# DF객체 생성
df = pd.DataFrame({'year':[2021,2022],
                   'month':[11,12],
                   'day':[15,19]})
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   year    2 non-null      int64
 1   month   2 non-null      int64
 2   day     2 non-null      int64
dtypes: int64(3)
memory usage: 176.0 bytes


Unnamed: 0,year,month,day
0,2021,11,15
1,2022,12,19


In [7]:
# DF객체 타입 정보
df.dtypes

year     int64
month    int64
day      int64
dtype: object

In [9]:
# Datetime으로 변환 => pandas.to_datetime()
result = pd.to_datetime(df)
print(type(result), result, sep='\n')

<class 'pandas.core.series.Series'>
0   2021-11-15
1   2022-12-19
dtype: datetime64[ns]


In [15]:
one = result[0]
print(type(one), one, sep='\n')

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
2021-11-15 00:00:00


In [32]:
one.day, one.month, one.month_name()

(15, 11, 'November')

In [25]:
# 타입 및 다른 포맷 변환 메서드들 => to_변환type()
result.to_frame()

Unnamed: 0,0
0,2021-11-15
1,2022-12-19


In [26]:
result.to_json('result.json')
result.to_csv('result.csv', index=False)

# CSV 파일 활용 실습
- 날짜 데이터가 있는 CSV파일 처리하기

In [66]:
survey_visited = pd.read_csv('../Data/survey_visited.csv')
survey_visited

Unnamed: 0,ident,site,dated
0,619,DR-1,1927-02-08
1,622,DR-1,1927-02-10
2,734,DR-3,1939-01-07
3,735,DR-3,1930-01-12
4,751,DR-3,1930-02-26
5,752,DR-3,
6,837,MSK-4,1932-01-14
7,844,DR-1,1932-03-22


In [67]:
survey_visited.fillna(method='ffill', inplace=True)
survey_visited['dated'] = pd.to_datetime(survey_visited['dated'])
survey_visited.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ident   8 non-null      int64         
 1   site    8 non-null      object        
 2   dated   8 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 320.0+ bytes


In [68]:
survey_visited

Unnamed: 0,ident,site,dated
0,619,DR-1,1927-02-08
1,622,DR-1,1927-02-10
2,734,DR-3,1939-01-07
3,735,DR-3,1930-01-12
4,751,DR-3,1930-02-26
5,752,DR-3,1930-02-26
6,837,MSK-4,1932-01-14
7,844,DR-1,1932-03-22


In [78]:
survey_visited['dated'][0].year

1927

In [82]:
type(survey_visited['dated']), survey_visited['dated'].dtype

(pandas.core.series.Series, dtype('<M8[ns]'))

In [87]:
# Series에 있는 datetime 요소들을 뽑아오는 방법
survey_visited['dated'].dt.year

0    1927
1    1927
2    1939
3    1930
4    1930
5    1930
6    1932
7    1932
Name: dated, dtype: int64

In [91]:
dateSR = survey_visited['dated']
dateSR

0   1927-02-08
1   1927-02-10
2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [92]:
dateSR[dateSR.dt.year == 1930]

3   1930-01-12
4   1930-02-26
5   1930-02-26
Name: dated, dtype: datetime64[ns]

In [93]:
dateSR[dateSR.dt.month == 2]

0   1927-02-08
1   1927-02-10
4   1930-02-26
5   1930-02-26
Name: dated, dtype: datetime64[ns]