In [1]:
# 금융 관련 정보를 읽어올 수 있는 라이브러리
!pip install pandas_datareader

Collecting pandas_datareader
  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
Installing collected packages: pandas-datareader
Successfully installed pandas-datareader-0.10.0


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from pandas_datareader import data

plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['font.size'] = 16
plt.rcParams['figure.figsize'] = 20, 10
plt.rcParams['axes.unicode_minus'] = False

### 주식 정보 읽어오기

In [11]:
# 종목코드, 서버이름, 시작날짜, 종료날짜
# 종목코드는 yahoo finance 사이트에서 검색해서 파악해야 합니다.
f = data.DataReader('005930.KS', 'yahoo', '2010-01-01', '2022-12-31')
f

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,16180.0,16000.0,16060.0,16180.0,11950800.0,12810.791016
2010-01-05,16580.0,16300.0,16520.0,16440.0,27925850.0,13016.647461
2010-01-06,16820.0,16520.0,16580.0,16820.0,22948850.0,13317.520508
2010-01-07,16820.0,16260.0,16820.0,16260.0,22107950.0,12874.129883
2010-01-08,16420.0,16120.0,16400.0,16420.0,14777550.0,13000.812500
...,...,...,...,...,...,...
2022-08-12,60700.0,59400.0,59500.0,60200.0,10786658.0,60200.000000
2022-08-16,61600.0,60300.0,60500.0,61000.0,15036727.0,61000.000000
2022-08-17,61200.0,60300.0,61100.0,60400.0,9061518.0,60400.000000
2022-08-18,61900.0,60000.0,60300.0,61500.0,16372754.0,61500.000000


In [12]:
# 야후서버가 문제가 있을 때 장이 열리지 않는 날짜 데이터도 가져올 수 있다.
# 결측치 확인
f.isna().sum()

High         0
Low          0
Open         0
Close        0
Volume       0
Adj Close    0
dtype: int64

In [13]:
# 거래량이 0인 행이 있는지 확인한다.
f.query('Volume == 0')

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-14,22800.0,22800.0,22800.0,22800.0,0.0,18947.421875
2017-09-26,53620.0,53620.0,53620.0,53620.0,0.0,46301.253906
2018-04-30,53000.0,53000.0,53000.0,53000.0,0.0,46642.304688
2018-05-02,53000.0,53000.0,53000.0,53000.0,0.0,46642.304688
2018-05-03,53000.0,53000.0,53000.0,53000.0,0.0,46642.304688
2020-03-09,56500.0,56500.0,56500.0,56500.0,0.0,52451.089844
2020-03-12,52100.0,52100.0,52100.0,52100.0,0.0,48366.40625
2022-01-26,74000.0,74000.0,74000.0,74000.0,0.0,73172.039062
2022-02-08,73000.0,73000.0,73000.0,73000.0,0.0,72183.226562
2022-02-09,73000.0,73000.0,73000.0,73000.0,0.0,72183.226562


In [14]:
# 거래량이 0인 행은 제거한다.
idx = f.query('Volume == 0').index
f.drop(idx, inplace=True)
f.query('Volume == 0')

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [15]:
f.reset_index(inplace=True)
f

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2010-01-04,16180.0,16000.0,16060.0,16180.0,11950800.0,12810.791016
1,2010-01-05,16580.0,16300.0,16520.0,16440.0,27925850.0,13016.647461
2,2010-01-06,16820.0,16520.0,16580.0,16820.0,22948850.0,13317.520508
3,2010-01-07,16820.0,16260.0,16820.0,16260.0,22107950.0,12874.129883
4,2010-01-08,16420.0,16120.0,16400.0,16420.0,14777550.0,13000.812500
...,...,...,...,...,...,...,...
3092,2022-08-12,60700.0,59400.0,59500.0,60200.0,10786658.0,60200.000000
3093,2022-08-16,61600.0,60300.0,60500.0,61000.0,15036727.0,61000.000000
3094,2022-08-17,61200.0,60300.0,61100.0,60400.0,9061518.0,60400.000000
3095,2022-08-18,61900.0,60000.0,60300.0,61500.0,16372754.0,61500.000000


In [16]:
# 컬럼 이름 변경
f.columns = ['거래일', '최고가', ' 최저가', '시작가', '종가', '거래량', '수정종가']
f

Unnamed: 0,거래일,최고가,최저가,시작가,종가,거래량,수정종가
0,2010-01-04,16180.0,16000.0,16060.0,16180.0,11950800.0,12810.791016
1,2010-01-05,16580.0,16300.0,16520.0,16440.0,27925850.0,13016.647461
2,2010-01-06,16820.0,16520.0,16580.0,16820.0,22948850.0,13317.520508
3,2010-01-07,16820.0,16260.0,16820.0,16260.0,22107950.0,12874.129883
4,2010-01-08,16420.0,16120.0,16400.0,16420.0,14777550.0,13000.812500
...,...,...,...,...,...,...,...
3092,2022-08-12,60700.0,59400.0,59500.0,60200.0,10786658.0,60200.000000
3093,2022-08-16,61600.0,60300.0,60500.0,61000.0,15036727.0,61000.000000
3094,2022-08-17,61200.0,60300.0,61100.0,60400.0,9061518.0,60400.000000
3095,2022-08-18,61900.0,60000.0,60300.0,61500.0,16372754.0,61500.000000


In [17]:
# 저장한다.
f.to_csv('./data/samsung_stock.csv', index=False, encoding='utf-8-sig')