In [2]:
import pandas as pd
import requests
import os
from bs4 import BeautifulSoup
from time import sleep
from matplotlib import font_manager, rc
import matplotlib.pyplot as plt
import warnings
import folium

# 경고 메시지 생략(무시)
warnings.filterwarnings('ignore')

# 그래프에서 한글처리 문제 해결
font_path = 'malgun.ttf'
font_name = font_manager.FontProperties(fname = font_path).get_name()
rc('font', family = font_name)

# 그래프에서 음수(minus) 기호 문제 해결
plt.rcParams['axes.unicode_minus'] = False

In [4]:
stock = pd.read_csv('csv/삼성전자_주식.csv')
stock.head(3)

Unnamed: 0,date,close,open,max,min,volume
0,2024-01-10,73600,75000,75200,73200,20259529
1,2024-01-11,73200,72900,73600,72700,57691266
2,2024-01-12,73100,73000,74100,72800,13038939


In [6]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    50 non-null     object
 1   close   50 non-null     int64 
 2   open    50 non-null     int64 
 3   max     50 non-null     int64 
 4   min     50 non-null     int64 
 5   volume  50 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 2.5+ KB


In [30]:
stock2 = stock.copy()

new_date = pd.to_datetime(stock2['date'])
stock2.insert(1, 'new_date', new_date)
stock2.info()
print(stock2.loc[0, 'date'],  type(stock2.loc[0, 'date']))
print(stock2.loc[0, 'new_date'],  type(stock2.loc[0, 'new_date']))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      50 non-null     object        
 1   new_date  50 non-null     datetime64[ns]
 2   close     50 non-null     int64         
 3   open      50 non-null     int64         
 4   max       50 non-null     int64         
 5   min       50 non-null     int64         
 6   volume    50 non-null     int64         
dtypes: datetime64[ns](1), int64(5), object(1)
memory usage: 2.9+ KB
2024-01-10 <class 'str'>
2024-01-10 00:00:00 <class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [32]:
stock3 = stock2.copy()

stock3.set_index('new_date', inplace = True)
stock3.drop('date', axis = 'columns', inplace = True)
stock3

Unnamed: 0_level_0,close,open,max,min,volume
new_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-10,73600,75000,75200,73200,20259529
2024-01-11,73200,72900,73600,72700,57691266
2024-01-12,73100,73000,74100,72800,13038939
2024-01-15,73900,73200,74000,73200,13212339
2024-01-16,72600,73500,73700,72500,14760415
2024-01-17,71000,73100,73300,71000,22683660
2024-01-18,71700,71600,72000,70700,17853397
2024-01-19,74700,73500,74700,73000,23363427
2024-01-22,75100,75900,76000,75000,19673375
2024-01-23,75200,75700,75800,74300,14786224


In [44]:
dates = ['2019-01-01', '2020-03-01', '2021-06-01']

ts_dates = pd.to_datetime(dates)


# A(연초), A(연말), MS(월초), M(월말), D(일), H(시간), T(분), S(초), W(주)
pr_day = ts_dates.to_period(freq = '2D')
print(pr_day)

pr_month = ts_dates.to_period(freq = 'M')
print(pr_month)

pr_week = ts_dates.to_period(freq = 'W')
print(pr_week)

pr_week2 = ts_dates.to_period(freq = 'W-MON')

print(pr_week2)

PeriodIndex(['2019-01-01', '2020-03-01', '2021-06-01'], dtype='period[2D]')
PeriodIndex(['2019-01', '2020-03', '2021-06'], dtype='period[M]')
PeriodIndex(['2018-12-31/2019-01-06', '2020-02-24/2020-03-01',
             '2021-05-31/2021-06-06'],
            dtype='period[W-SUN]')
PeriodIndex(['2019-01-01/2019-01-07', '2020-02-25/2020-03-02',
             '2021-06-01/2021-06-07'],
            dtype='period[W-MON]')


In [53]:
# A(연초), A(연말), MS(월초), M(월말), D(일), H(시간), T(분), S(초), W(주)
# pd.date_range(start = '시작일', end = '마지막일', periods = 수량,
#               freq = '주기', tz = 'Asia/Seoul')

# 2024-01-01 부터 2024-03-05 까지 일단위의 배열(목록)
ts_day1 = pd.date_range(start = '2024-01-01', end = '2024-01-10', freq = 'D')
print(ts_day1)

ts_day2 = pd.date_range(start = '2024-01-01', end = '2024-01-10', freq = 'D',
                        tz = 'Asia/Seoul')
print(ts_day2)

ts_week_sat = pd.date_range(start = '2024-01-01', end = '2024-02-29', freq = 'W-SAT')
print(ts_week_sat)

DatetimeIndex(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04',
               '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08',
               '2024-01-09', '2024-01-10'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2024-01-01 00:00:00+09:00', '2024-01-02 00:00:00+09:00',
               '2024-01-03 00:00:00+09:00', '2024-01-04 00:00:00+09:00',
               '2024-01-05 00:00:00+09:00', '2024-01-06 00:00:00+09:00',
               '2024-01-07 00:00:00+09:00', '2024-01-08 00:00:00+09:00',
               '2024-01-09 00:00:00+09:00', '2024-01-10 00:00:00+09:00'],
              dtype='datetime64[ns, Asia/Seoul]', freq='D')
DatetimeIndex(['2024-01-06', '2024-01-13', '2024-01-20', '2024-01-27',
               '2024-02-03', '2024-02-10', '2024-02-17', '2024-02-24'],
              dtype='datetime64[ns]', freq='W-SAT')


In [58]:
ts_week_sat2 = pd.date_range(start = '2024-01-01', periods = 10, freq = 'W-SAT')
list(ts_week_sat2)

[Timestamp('2024-01-06 00:00:00'),
 Timestamp('2024-01-13 00:00:00'),
 Timestamp('2024-01-20 00:00:00'),
 Timestamp('2024-01-27 00:00:00'),
 Timestamp('2024-02-03 00:00:00'),
 Timestamp('2024-02-10 00:00:00'),
 Timestamp('2024-02-17 00:00:00'),
 Timestamp('2024-02-24 00:00:00'),
 Timestamp('2024-03-02 00:00:00'),
 Timestamp('2024-03-09 00:00:00')]

In [61]:
pr_4hour = pd.period_range(start = '2024-01-01', end = '2024-01-03', freq = '4H')
print(pr_4hour)

pr_2hour = pd.period_range(start = '2024-01-01', periods = 5, freq = '2H')
print(pr_2hour)

PeriodIndex(['2024-01-01 00:00', '2024-01-01 04:00', '2024-01-01 08:00',
             '2024-01-01 12:00', '2024-01-01 16:00', '2024-01-01 20:00',
             '2024-01-02 00:00', '2024-01-02 04:00', '2024-01-02 08:00',
             '2024-01-02 12:00', '2024-01-02 16:00', '2024-01-02 20:00',
             '2024-01-03 00:00'],
            dtype='period[4H]')
PeriodIndex(['2024-01-01 00:00', '2024-01-01 02:00', '2024-01-01 04:00',
             '2024-01-01 06:00', '2024-01-01 08:00'],
            dtype='period[2H]')


In [72]:
stock3 = stock2.copy()
stock3['year'] = stock3['new_date'].dt.year
stock3['month'] = stock3['new_date'].dt.month
stock3['day'] = stock3['new_date'].dt.day
stock3.head(5)

Unnamed: 0,date,new_date,close,open,max,min,volume,year,month,day
0,2024-01-10,2024-01-10,73600,75000,75200,73200,20259529,2024,1,10
1,2024-01-11,2024-01-11,73200,72900,73600,72700,57691266,2024,1,11
2,2024-01-12,2024-01-12,73100,73000,74100,72800,13038939,2024,1,12
3,2024-01-15,2024-01-15,73900,73200,74000,73200,13212339,2024,1,15
4,2024-01-16,2024-01-16,72600,73500,73700,72500,14760415,2024,1,16


In [103]:
stock3 = stock2.copy()
date = stock3['date'].str.split('-')
stock3['year'] = date.str.get(0)
stock3['year'] = stock3['year'].astype(int)
stock3['month'] = date.str.get(1)
stock3['month'] = stock3['month'].astype(int)
stock3['day'] = date.str.get(2)
stock3['day'] = stock3['day'].astype(int)
stock3.set_index(['year', 'month', 'day'], inplace = True)
stock3.loc[(2024, 1, 15):(2024, 1, 19)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,new_date,close,open,max,min,volume
year,month,day,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024,1,15,2024-01-15,2024-01-15,73900,73200,74000,73200,13212339
2024,1,16,2024-01-16,2024-01-16,72600,73500,73700,72500,14760415
2024,1,17,2024-01-17,2024-01-17,71000,73100,73300,71000,22683660
2024,1,18,2024-01-18,2024-01-18,71700,71600,72000,70700,17853397
2024,1,19,2024-01-19,2024-01-19,74700,73500,74700,73000,23363427


In [65]:
import datetime

date1 = datetime.date(2024, 3, 1)
print( date1.year )
print( date1.month )

2024
3


In [110]:
stock3 = stock2.copy()
stock3.set_index('new_date', inplace = True)
stock3.drop('date', axis = 'columns', inplace = True)
stock3.head(3)

Unnamed: 0_level_0,close,open,max,min,volume
new_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-10,73600,75000,75200,73200,20259529
2024-01-11,73200,72900,73600,72700,57691266
2024-01-12,73100,73000,74100,72800,13038939


In [120]:
stock3.iloc[-5:, [1, 0]]

Unnamed: 0_level_0,open,close
new_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-03-18,72600,72800
2024-03-19,72300,72800
2024-03-20,73700,76900
2024-03-21,79200,79300
2024-03-22,79600,78900
