### 6.1 Timestamp

In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
ts = pd.to_datetime("2024-02-26")
print(type(ts))
print(ts)

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
2024-02-26 00:00:00


In [3]:
ts = pd.to_datetime("2024-02-26 10:49:59")
print(ts)

2024-02-26 10:49:59


In [4]:
print(pd.to_datetime("20240226 090000"))

2024-02-26 09:00:00


In [5]:
print(pd.to_datetime("06/07/20", format="%d/%m/%y"))

2020-07-06 00:00:00


In [6]:
ts = pd.to_datetime("20210814")
print(ts.year)

2021


In [7]:
ts.hour

0

In [8]:
ts = pd.to_datetime("20210814")
print(ts.weekday())

5


In [9]:
print(ts.strftime("%Y-%m-%d"))

2021-08-14


In [10]:
diff = pd.Timedelta(days = 100, hours = 2, minutes = 30, seconds = 30)
print(diff)

100 days 02:30:30


In [11]:
print(ts + diff)

2021-11-22 02:30:30


In [12]:
ts

Timestamp('2021-08-14 00:00:00')

In [13]:
candidates = ["20210101", "20210102", "20210103"]
idx = pd.to_datetime(candidates)
print(idx)

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03'], dtype='datetime64[ns]', freq=None)


In [14]:
print(idx[0])

2021-01-01 00:00:00


In [15]:
print(idx.year)

Index([2021, 2021, 2021], dtype='int32')


In [16]:
print(idx.day)

Index([1, 2, 3], dtype='int32')


In [17]:
day = 1628899200/60/60/24
year = day/365
print(year)

51.652054794520545


In [18]:
dt = pd.to_datetime(1628899200, unit="s")
print(dt)

2021-08-14 00:00:00


In [19]:
import pandas as pd
import numpy as np

# 임의의 숫자를 생성하여 데이터 프레임 만들기
np.random.seed(0)  # 결과의 일관성을 위해 시드 설정
data = {
    "시가": np.random.randint(100, 500, size=100),
    "고가": np.random.randint(100, 500, size=100),
    "저가": np.random.randint(100, 500, size=100),
    "종가": np.random.randint(100, 500, size=100)
}

df = pd.DataFrame(data)
df

Unnamed: 0,시가,고가,저가,종가
0,272,441,261,125
1,147,148,460,423
2,217,405,328,459
3,292,169,351,391
4,423,269,221,214
...,...,...,...,...
95,327,125,447,327
96,379,477,268,461
97,307,474,472,374
98,497,217,392,473


In [20]:
# 기존 데이터 프레임에 날짜 인덱스 추가
date_index = pd.date_range(start="2020-06-01", periods=100)
df.index = date_index
df


Unnamed: 0,시가,고가,저가,종가
2020-06-01,272,441,261,125
2020-06-02,147,148,460,423
2020-06-03,217,405,328,459
2020-06-04,292,169,351,391
2020-06-05,423,269,221,214
...,...,...,...,...
2020-09-04,327,125,447,327
2020-09-05,379,477,268,461
2020-09-06,307,474,472,374
2020-09-07,497,217,392,473


In [21]:
print(df.loc["2020-06-01"])

시가    272
고가    441
저가    261
종가    125
Name: 2020-06-01 00:00:00, dtype: int64


### 6.2 시계열 데이터 활용

In [28]:
import pandas as pd

In [29]:
df = pd.read_excel("/Users/time/Documents/GitHub/Python/0. 학습_책/2. 금융 데이터 분석을 위한 판다스/학습자료/ch06/data/ss_ex_1.xlsx", index_col = 0)

  warn("Workbook contains no default style, apply openpyxl's default")


In [30]:
df

Unnamed: 0_level_0,종가,대비,등락률,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021/08/13,74400,-2600,-3.38,75800,76000,74100,61270643,4575267536355,444151821720000,5969782550
2021/08/12,77000,-1500,-1.91,77100,78200,76900,42365223,3276635421700,459673256350000,5969782550
2021/08/11,78500,-1700,-2.12,79600,79800,78500,30241137,2389977254924,468627930175000,5969782550
2021/08/10,80200,-1300,-1.60,82300,82400,80100,20362639,1643107615500,478776560510000,5969782550
2021/08/09,81500,0,0.00,81500,82300,80900,15522581,1267668377900,486537277825000,5969782550
...,...,...,...,...,...,...,...,...,...,...
2021/02/19,82600,500,0.61,82300,82800,81000,25880879,2121275310450,493104038630000,5969782550
2021/02/18,82100,-1100,-1.32,83200,83600,82100,21327683,1762033944231,490119147355000,5969782550
2021/02/17,83200,-1700,-2.00,83900,84200,83000,18307735,1526409421172,496685908160000,5969782550
2021/02/16,84900,700,0.83,84500,86000,84200,20483100,1740792201903,506834538495000,5969782550


In [32]:
df.index = pd.to_datetime(df.index)

In [33]:
df.index

DatetimeIndex(['2021-08-13', '2021-08-12', '2021-08-11', '2021-08-10',
               '2021-08-09', '2021-08-06', '2021-08-05', '2021-08-04',
               '2021-08-03', '2021-08-02',
               ...
               '2021-02-26', '2021-02-25', '2021-02-24', '2021-02-23',
               '2021-02-22', '2021-02-19', '2021-02-18', '2021-02-17',
               '2021-02-16', '2021-02-15'],
              dtype='datetime64[ns]', name='일자', length=127, freq=None)

In [34]:
df = df.sort_index()

In [35]:
df

Unnamed: 0_level_0,종가,대비,등락률,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
일자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-02-15,84200,2600,3.19,83800,84500,83300,23529706,1978336504100,502655690710000,5969782550
2021-02-16,84900,700,0.83,84500,86000,84200,20483100,1740792201903,506834538495000,5969782550
2021-02-17,83200,-1700,-2.00,83900,84200,83000,18307735,1526409421172,496685908160000,5969782550
2021-02-18,82100,-1100,-1.32,83200,83600,82100,21327683,1762033944231,490119147355000,5969782550
2021-02-19,82600,500,0.61,82300,82800,81000,25880879,2121275310450,493104038630000,5969782550
...,...,...,...,...,...,...,...,...,...,...
2021-08-09,81500,0,0.00,81500,82300,80900,15522581,1267668377900,486537277825000,5969782550
2021-08-10,80200,-1300,-1.60,82300,82400,80100,20362639,1643107615500,478776560510000,5969782550
2021-08-11,78500,-1700,-2.12,79600,79800,78500,30241137,2389977254924,468627930175000,5969782550
2021-08-12,77000,-1500,-1.91,77100,78200,76900,42365223,3276635421700,459673256350000,5969782550


In [36]:
df = pd.read_excel("/Users/time/Documents/GitHub/Python/0. 학습_책/2. 금융 데이터 분석을 위한 판다스/학습자료/ch06/data/ss_ex_1.xlsx", parse_dates=["일자"])

  warn("Workbook contains no default style, apply openpyxl's default")


In [37]:
df = df.sort_values('일자')

In [38]:
df

Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
126,2021-02-15,84200,2600,3.19,83800,84500,83300,23529706,1978336504100,502655690710000,5969782550
125,2021-02-16,84900,700,0.83,84500,86000,84200,20483100,1740792201903,506834538495000,5969782550
124,2021-02-17,83200,-1700,-2.00,83900,84200,83000,18307735,1526409421172,496685908160000,5969782550
123,2021-02-18,82100,-1100,-1.32,83200,83600,82100,21327683,1762033944231,490119147355000,5969782550
122,2021-02-19,82600,500,0.61,82300,82800,81000,25880879,2121275310450,493104038630000,5969782550
...,...,...,...,...,...,...,...,...,...,...,...
4,2021-08-09,81500,0,0.00,81500,82300,80900,15522581,1267668377900,486537277825000,5969782550
3,2021-08-10,80200,-1300,-1.60,82300,82400,80100,20362639,1643107615500,478776560510000,5969782550
2,2021-08-11,78500,-1700,-2.12,79600,79800,78500,30241137,2389977254924,468627930175000,5969782550
1,2021-08-12,77000,-1500,-1.91,77100,78200,76900,42365223,3276635421700,459673256350000,5969782550


In [39]:
df = df.sort_index()

In [40]:
df

Unnamed: 0,일자,종가,대비,등락률,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
0,2021-08-13,74400,-2600,-3.38,75800,76000,74100,61270643,4575267536355,444151821720000,5969782550
1,2021-08-12,77000,-1500,-1.91,77100,78200,76900,42365223,3276635421700,459673256350000,5969782550
2,2021-08-11,78500,-1700,-2.12,79600,79800,78500,30241137,2389977254924,468627930175000,5969782550
3,2021-08-10,80200,-1300,-1.60,82300,82400,80100,20362639,1643107615500,478776560510000,5969782550
4,2021-08-09,81500,0,0.00,81500,82300,80900,15522581,1267668377900,486537277825000,5969782550
...,...,...,...,...,...,...,...,...,...,...,...
122,2021-02-19,82600,500,0.61,82300,82800,81000,25880879,2121275310450,493104038630000,5969782550
123,2021-02-18,82100,-1100,-1.32,83200,83600,82100,21327683,1762033944231,490119147355000,5969782550
124,2021-02-17,83200,-1700,-2.00,83900,84200,83000,18307735,1526409421172,496685908160000,5969782550
125,2021-02-16,84900,700,0.83,84500,86000,84200,20483100,1740792201903,506834538495000,5969782550


In [41]:
print(df['일자'].dtype)

datetime64[ns]


In [42]:
print(type(df['일자'].iloc[0]))

<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [43]:
df.index

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126],
      dtype='int64', length=127)

In [44]:
df['일자'].dt.quarter

0      3
1      3
2      3
3      3
4      3
      ..
122    1
123    1
124    1
125    1
126    1
Name: 일자, Length: 127, dtype: int32