# PANDAS DATE-TIME 다루기

In [1]:
# 모듈 로딩
import pandas as pd

# DF 객체 생성
df=pd.DataFrame({'year':[2021, 2022],
                'month':[11, 12],
                'day':[1, 24]})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   year    2 non-null      int64
 1   month   2 non-null      int64
 2   day     2 non-null      int64
dtypes: int64(3)
memory usage: 176.0 bytes


In [2]:
df

Unnamed: 0,year,month,day
0,2021,11,1
1,2022,12,24


In [3]:
# DF 객체 타입 정보 # 컬럼별로
df.dtypes

year     int64
month    int64
day      int64
dtype: object

In [5]:
# DateTime으로 변환 => pandas.to_datetime()
result=pd.to_datetime(df)

print(type(result), result, sep='\n') # 여러 개 빼오면 타입이 datetime64

<class 'pandas.core.series.Series'>
0   2021-11-01
1   2022-12-24
dtype: datetime64[ns]


In [7]:
print(type(result[0]), result[0], sep='\n')  # 한 개만 빼오면 데이터 타입이 timestamp

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
2021-11-01 00:00:00


## 타입 및 다른 포맷 변환 메서드들.... to_변환타입()

In [16]:
result.to_frame() # 데이터프레임으로 변환
result.to_list()  # 리스트로 변환
result.to_string()  # 문자열로 변환

'0   2021-11-01\n1   2022-12-24'

In [15]:
result.to_csv('result.csv', index=False)    # result.csv라는 csv 파일이 생김

In [14]:
result.to_json('result.json')  # result.json이라는 json 파일이 생김

## CSV 파일 활용 실습

- 날짜데이터가 있는 CSV 파일 처리하기

In [32]:
survey=pd.read_csv('../Data/survey_visited.csv')
survey

Unnamed: 0,ident,site,dated
0,619,DR-1,1927-02-08
1,622,DR-1,1927-02-10
2,734,DR-3,1939-01-07
3,735,DR-3,1930-01-12
4,751,DR-3,1930-02-26
5,752,DR-3,
6,837,MSK-4,1932-01-14
7,844,DR-1,1932-03-22


In [33]:
survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ident   8 non-null      int64 
 1   site    8 non-null      object
 2   dated   7 non-null      object
dtypes: int64(1), object(2)
memory usage: 320.0+ bytes


In [34]:
# 결측치 확인
survey.isnull().sum()

ident    0
site     0
dated    1
dtype: int64

In [35]:
# 결측치 처리  # 이전 값으로 치환
survey['dated'].fillna(method='ffill', inplace=True)

In [36]:
survey

Unnamed: 0,ident,site,dated
0,619,DR-1,1927-02-08
1,622,DR-1,1927-02-10
2,734,DR-3,1939-01-07
3,735,DR-3,1930-01-12
4,751,DR-3,1930-02-26
5,752,DR-3,1930-02-26
6,837,MSK-4,1932-01-14
7,844,DR-1,1932-03-22


In [40]:
survey['dated']=pd.to_datetime(survey['dated'])
survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ident   8 non-null      int64         
 1   site    8 non-null      object        
 2   dated   8 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 320.0+ bytes


In [38]:
survey

Unnamed: 0,ident,site,dated
0,619,DR-1,1927-02-08
1,622,DR-1,1927-02-10
2,734,DR-3,1939-01-07
3,735,DR-3,1930-01-12
4,751,DR-3,1930-02-26
5,752,DR-3,1930-02-26
6,837,MSK-4,1932-01-14
7,844,DR-1,1932-03-22


In [39]:
type(survey['dated'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [41]:
survey['dated'][0], survey['dated'][0].year, survey['dated'][0].day

(Timestamp('1927-02-08 00:00:00'), 1927, 8)

In [42]:
type(survey['dated']), survey['dated'].dtype

(pandas.core.series.Series, dtype('<M8[ns]'))

In [43]:
# survey['dated'].year는 안됨. series에는 .year라는 속성이 없어서
survey['dated'].dt.year

0    1927
1    1927
2    1939
3    1930
4    1930
5    1930
6    1932
7    1932
Name: dated, dtype: int64

In [44]:
survey['dated'].dt.month

0    2
1    2
2    1
3    1
4    2
5    2
6    1
7    3
Name: dated, dtype: int64

In [45]:
survey['dated'].dt.day

0     8
1    10
2     7
3    12
4    26
5    26
6    14
7    22
Name: dated, dtype: int64

In [46]:
survey['dated'].dt.dayofweek

0    1
1    3
2    5
3    6
4    2
5    2
6    3
7    1
Name: dated, dtype: int64

In [50]:
dateSR=survey['dated']
dateSR

0   1927-02-08
1   1927-02-10
2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [51]:
dateSR[dateSR.dt.year>=1930]

2   1939-01-07
3   1930-01-12
4   1930-02-26
5   1930-02-26
6   1932-01-14
7   1932-03-22
Name: dated, dtype: datetime64[ns]

In [52]:
dateSR[dateSR.dt.month>=2]

0   1927-02-08
1   1927-02-10
4   1930-02-26
5   1930-02-26
7   1932-03-22
Name: dated, dtype: datetime64[ns]