# 주가 지수, 종목 코드, 종가 데이터 셋 만들기
* 주가 지수 셋에 종목 코드, 상장시장, 종가 열 추가

## 주가 지수 데이터 수집

In [46]:
import pandas as pd
import os
import FinanceDataReader as fdr
from tqdm import tqdm

### 시작과 끝 날짜 지정

In [47]:
start_date = '2021-01-04'
end_date = '2021-11-26'

### KOSDAQ

In [48]:
# KOSDAQ
kosdaq = fdr.DataReader('KQ11', start = start_date, end = end_date)
kosdaq = kosdaq['Close']
kosdaq.name= 'kosdaq'
kosdaq

Date
2021-01-04     977.62
2021-01-05     985.76
2021-01-06     981.39
2021-01-07     988.86
2021-01-08     987.79
               ...   
2021-11-22    1032.31
2021-11-23    1013.72
2021-11-24    1020.13
2021-11-25    1015.66
2021-11-26    1005.89
Name: kosdaq, Length: 224, dtype: float64

### NASDAQ

In [49]:
# 나스닥
nasdaq = fdr.DataReader('IXIC', start = start_date, end = end_date)
nasdaq = nasdaq['Close']
nasdaq.name= 'nasdaq'
nasdaq

Date
2021-01-04    12698.4
2021-01-05    12819.0
2021-01-06    12740.8
2021-01-07    13067.5
2021-01-08    13202.0
               ...   
2021-11-19    16057.4
2021-11-22    15854.8
2021-11-23    15775.1
2021-11-24    15845.2
2021-11-26    15491.7
Name: nasdaq, Length: 228, dtype: float64

### DOW

In [50]:
# 다우존스
dow = fdr.DataReader('DJI', start = start_date, end = end_date)
dow = dow['Close']
dow.name= 'dow'
dow

Date
2021-01-04    30223.89
2021-01-05    30391.60
2021-01-06    30829.40
2021-01-07    31041.13
2021-01-08    31097.97
                ...   
2021-11-19    35602.18
2021-11-22    35619.26
2021-11-23    35813.74
2021-11-24    35805.17
2021-11-26    34908.10
Name: dow, Length: 228, dtype: float64

### S&P500

In [51]:
# S&P 500 지수
sp500 = fdr.DataReader('US500', start = start_date, end = end_date)
sp500 = sp500['Close']
sp500.name= 'sp500'
sp500

Date
2021-01-04    3700.65
2021-01-05    3726.86
2021-01-06    3748.14
2021-01-07    3803.79
2021-01-08    3824.68
               ...   
2021-11-19    4697.96
2021-11-22    4682.95
2021-11-23    4690.70
2021-11-24    4701.46
2021-11-26    4594.62
Name: sp500, Length: 228, dtype: float64

### KOSPI

In [52]:
# KOSPI
kospi = fdr.DataReader('KS11', start = start_date, end = end_date)
kospi = kospi['Close']
kospi.name= 'kospi'
kospi

Date
2021-01-04    2944.45
2021-01-05    2990.57
2021-01-06    2968.21
2021-01-07    3031.68
2021-01-08    3152.18
               ...   
2021-11-22    3013.25
2021-11-23    2997.33
2021-11-24    2994.29
2021-11-25    2980.27
2021-11-26    2936.44
Name: kospi, Length: 224, dtype: float64

### 종목 데이터 불러오기

In [53]:
path = '../data'
list_name = 'Stock_List.csv'
stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list.head()

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,5930,KOSPI
1,SK하이닉스,660,KOSPI
2,NAVER,35420,KOSPI
3,카카오,35720,KOSPI
4,삼성바이오로직스,207940,KOSPI


In [54]:
# 종목 코드 6자리로 맞추기
stock_list['종목코드'] = stock_list["종목코드"].astype(str).str.zfill(6)
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
365,맘스터치,220630,KOSDAQ
366,다날,064260,KOSDAQ
367,제이시스메디칼,287410,KOSDAQ
368,크리스에프앤씨,110790,KOSDAQ


### 주가 지수 데이터 셋 만들기

#### 평일 날짜 셋 만들기

In [55]:
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

In [56]:
Business_days.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 235 entries, 0 to 234
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    235 non-null    datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 2.0 KB


#### 지수 셋 합치기

In [57]:
data = pd.merge(Business_days, kosdaq, on='Date', how='left')
data = pd.merge(data, nasdaq, on='Date', how='left')
data = pd.merge(data, dow, on='Date', how='left')
data = pd.merge(data, sp500, on='Date', how='left')
data = pd.merge(data, kospi, on='Date', how='left')
data.head()

Unnamed: 0,Date,kosdaq,nasdaq,dow,sp500,kospi
0,2021-01-04,977.62,12698.4,30223.89,3700.65,2944.45
1,2021-01-05,985.76,12819.0,30391.6,3726.86,2990.57
2,2021-01-06,981.39,12740.8,30829.4,3748.14,2968.21
3,2021-01-07,988.86,13067.5,31041.13,3803.79,3031.68
4,2021-01-08,987.79,13202.0,31097.97,3824.68,3152.18


In [58]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 235 entries, 0 to 234
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    235 non-null    datetime64[ns]
 1   kosdaq  224 non-null    float64       
 2   nasdaq  228 non-null    float64       
 3   dow     228 non-null    float64       
 4   sp500   228 non-null    float64       
 5   kospi   224 non-null    float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 12.9 KB


#### 지수 데이터 셋 결측치 처리

In [59]:
data = data.interpolate()

In [60]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 235 entries, 0 to 234
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    235 non-null    datetime64[ns]
 1   kosdaq  235 non-null    float64       
 2   nasdaq  235 non-null    float64       
 3   dow     235 non-null    float64       
 4   sp500   235 non-null    float64       
 5   kospi   235 non-null    float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 12.9 KB


In [63]:
len(Business_days)

235

## 지수 셋에 종목 코드, 종가 열 붙이기

In [64]:
all_data = pd.DataFrame()
for code, market in tqdm(stock_list[['종목코드', '상장시장']].values):
    temp_code = [code] * len(Business_days)
    temp_market = [market] * len(Business_days)
    stock_data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    temp_close = pd.merge(Business_days, stock_data, how='outer')
    data['code'] = temp_code
    data['market'] = temp_market
    data['close'] = temp_close['Close']
    all_data = pd.concat([all_data, data])

100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:38<00:00,  3.77it/s]


In [65]:
all_data.head()

Unnamed: 0,Date,kosdaq,nasdaq,dow,sp500,kospi,code,market,close
0,2021-01-04,977.62,12698.4,30223.89,3700.65,2944.45,5930,KOSPI,83000.0
1,2021-01-05,985.76,12819.0,30391.6,3726.86,2990.57,5930,KOSPI,83900.0
2,2021-01-06,981.39,12740.8,30829.4,3748.14,2968.21,5930,KOSPI,82200.0
3,2021-01-07,988.86,13067.5,31041.13,3803.79,3031.68,5930,KOSPI,82900.0
4,2021-01-08,987.79,13202.0,31097.97,3824.68,3152.18,5930,KOSPI,88800.0


In [66]:
all_data.tail()

Unnamed: 0,Date,kosdaq,nasdaq,dow,sp500,kospi,code,market,close
230,2021-11-22,1032.31,15854.8,35619.26,4682.95,3013.25,99320,KOSDAQ,48350.0
231,2021-11-23,1013.72,15775.1,35813.74,4690.7,2997.33,99320,KOSDAQ,46900.0
232,2021-11-24,1020.13,15845.2,35805.17,4701.46,2994.29,99320,KOSDAQ,47150.0
233,2021-11-25,1015.66,15668.45,35356.635,4648.04,2980.27,99320,KOSDAQ,46600.0
234,2021-11-26,1005.89,15491.7,34908.1,4594.62,2936.44,99320,KOSDAQ,45350.0


In [68]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 86950 entries, 0 to 234
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    86950 non-null  datetime64[ns]
 1   kosdaq  86950 non-null  float64       
 2   nasdaq  86950 non-null  float64       
 3   dow     86950 non-null  float64       
 4   sp500   86950 non-null  float64       
 5   kospi   86950 non-null  float64       
 6   code    86950 non-null  object        
 7   market  86950 non-null  object        
 8   close   82872 non-null  float64       
dtypes: datetime64[ns](1), float64(6), object(2)
memory usage: 6.6+ MB


## 파일 출력하기

In [67]:
file_name = start_date + '_' + end_date + '_all_data' + '.csv'
all_data.to_csv(file_name, index=False)