In [1]:
## Library Import

# Basic
import re
import warnings
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 100)
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer

# for VIF
from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
# data load
df = pd.read_csv('original_full_data_20200325.csv', index_col = 0)
mmdf = pd.read_csv('bloomberg_map_index_20200324.csv', index_col = 0)



TARGET_DAY = 60

In [3]:
target = df[['date', 'KRXsemiconductor_close']]

In [4]:
df.drop('KRXsemiconductor_close', axis = 1, inplace = True)

In [5]:
len(target)

5051

In [6]:
# target preprocessing

target['date'] = pd.to_datetime(target['date'])
target.set_index('date', inplace = True)

In [7]:
target.isna().sum()

KRXsemiconductor_close    1632
dtype: int64

In [8]:
target = target.dropna()

In [9]:
len(target)

3419

In [10]:
target = (np.log1p(target) - np.log1p(target).shift(periods = TARGET_DAY, axis = 0))

In [11]:
target = target.shift(-1 * TARGET_DAY)

In [12]:
target = target[:-1 * TARGET_DAY]

In [13]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,-0.088455
2006-01-03,-0.091590
2006-01-04,-0.071684
2006-01-05,-0.037013
2006-01-06,-0.033750
...,...
2019-07-26,0.092139
2019-07-29,0.141737
2019-07-30,0.124408
2019-07-31,0.111455


In [14]:
len(target)

3359

In [15]:
# Class generates derived variables & shifting target variable

class Builder:

    def __init__(self, input_df, map_df, target_day):
        self.df = input_df
        self.mdf = map_df
        self.target_day = target_day # just target
        self.lag_days = [20, 60, 120] # lagging for prediction

    # not Bloomberg Economic
    
    def noteco(self, target_column):      

        col_name =  []
        # creating new column name
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        # appending lagged columns
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = np.log1p(self.df[target_column]) - np.log1p(self.df[target_column]).shift(periods = j, axis = 0) # 여기에 1을 뺀걸 추가하면 되는거아닌가? 1.얼만에서 1이 빠진거니까

        return self.df 

    # for Bloomberg Economic
    
    def eco(self, target_column):

        col_name = []
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = np.log1p(self.df[target_column]) - np.log1p(self.df[target_column].shift(periods = self.target_day, axis = 0))
        return self.df

    # executing functions
    
    def execution(self):
        for i, j in enumerate(self.mdf['ticker']): # mdf에서 ticker 확인
            if j in self.df.columns:
                    
                if self.mdf.loc[i, :][1] == 'eco':
                    self.eco(j)

                elif self.mdf.loc[i, :][1] == 'noteco':
                    self.noteco(j)
                    self.df.drop(j, axis = 1, inplace = True)

                else:
                    pass
            else:
                pass
                            
        return self.df

In [16]:
len(df.columns)

345

In [17]:
# date preprocessing

df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace = True)

In [18]:
# fillna cubic -> ffill -> bfill

#df = df.interpolate(method = 'cubic', limit_area = 'inside')
df = df.fillna(method = 'ffill')
df = df.fillna(method = 'bfill')

In [19]:
# check na value

df.isna().sum().sum()

0

In [20]:
# class load

cla = Builder(df, mmdf, TARGET_DAY)

In [21]:
# execution function
# 새로운 데이터프레임 생성 
cla.execution()

Unnamed: 0_level_0,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,NFPTCHIndex,...,QCOM_close120,SamsungElectronics_close20,SamsungElectronics_close60,SamsungElectronics_close120,ShinEtsuChemical_close20,ShinEtsuChemical_close60,ShinEtsuChemical_close120,Siltronic_close20,Siltronic_close60,Siltronic_close120,SKhynix_close20,SKhynix_close60,SKhynix_close120,Sumco_close20,Sumco_close60,Sumco_close120,TokyoElectron_close20,TokyoElectron_close60,TokyoElectron_close120,TSMC_close20,TSMC_close60,TSMC_close120,UMC_close20,UMC_close60,UMC_close120
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-01-02,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-03,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-04,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-05,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-06,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-10-27,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.051832,0.063883,0.142267,0.079714,0.066249,0.123778,0.174040,0.184395,0.359567,0.243316,0.031865,0.123073,0.176306,0.150061,0.179048,0.000000,0.041847,0.105682,0.091316,0.058440,0.160807,0.195714,0.064135,0.088666,0.033902
2019-10-28,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.086698,0.047912,0.167228,0.087541,0.075930,0.085888,0.154798,0.268355,0.407947,0.286738,0.024421,0.123073,0.176306,0.141203,0.174941,-0.004107,0.032854,0.103058,0.090834,0.032243,0.154521,0.199265,0.068993,0.080093,0.033902
2019-10-29,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.051382,0.044006,0.149592,0.092182,0.073813,0.093916,0.145008,0.301446,0.387078,0.261677,0.025626,0.069853,0.170343,0.145310,0.169292,-0.012220,0.041978,0.098902,0.095215,0.059338,0.151037,0.180153,0.073413,0.068563,0.018238
2019-10-30,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,215.0,1655.0,219.0,...,0.055234,0.037396,0.135799,0.085928,0.038874,0.100175,0.172869,0.263901,0.386620,0.279844,0.031155,0.051616,0.132303,0.132939,0.156921,-0.028632,0.054224,0.113140,0.116841,0.065146,0.156846,0.183377,0.057693,0.062520,0.021440


In [22]:
len(df.columns)

1308

In [23]:
df.columns

Index(['GDPCQOQIndex', 'GDPCYOYIndex', 'GDPCTOT%Index', 'GPDITOC%Index', 'GPGSTOC%Index',
       'RGCDCIPIIndex', 'GDPCUR$Index', 'GDPCURYIndex', 'GDPPIQQIndex', 'GDPCPCECIndex',
       ...
       'Sumco_close120', 'TokyoElectron_close20', 'TokyoElectron_close60',
       'TokyoElectron_close120', 'TSMC_close20', 'TSMC_close60', 'TSMC_close120', 'UMC_close20',
       'UMC_close60', 'UMC_close120'],
      dtype='object', length=1308)

In [24]:
df.isna().sum().sum()

565984

In [25]:
# shifting 한 값 날리기

df = df[TARGET_DAY:]

In [26]:
len(df)

4991

In [27]:
len(df)

4991

In [28]:
# nan값이 500을 넘으면 제거 
drop_columns = df.isna().sum().sort_values(ascending = False)[df.isna().sum().sort_values(ascending = False).values>500].index

In [29]:
df.drop(drop_columns, axis = 1, inplace = True)

In [30]:
len(df.columns)

1053

In [31]:
# fill na for shifted data

#df = df.interpolate(method = 'cubic', limit_area = 'inside')
df = df.fillna(method = 'ffill')
df = df.fillna(method = 'bfill')

In [32]:
# check na

df.isna().sum().sum()

0

In [33]:
len(df)

4991

In [34]:
# features

len(df.columns)

1053

### 이 시점에서 target과 merge

In [35]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,-0.088455
2006-01-03,-0.091590
2006-01-04,-0.071684
2006-01-05,-0.037013
2006-01-06,-0.033750
...,...
2019-07-26,0.092139
2019-07-29,0.141737
2019-07-30,0.124408
2019-07-31,0.111455


In [36]:
merged_df = pd.merge(target, df, how = 'left', on = 'date')

In [37]:
len(merged_df)

3359

In [38]:
df = merged_df.dropna()

In [39]:
df.isna().sum().sum()

0

In [40]:
print('{:.0f}'.format(len(df)/5051*100), '% 만큼 데이터 살았다')

66 % 만큼 데이터 살았다


In [41]:
df

Unnamed: 0_level_0,KRXsemiconductor_close,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,...,QCOM_close120,SamsungElectronics_close20,SamsungElectronics_close60,SamsungElectronics_close120,ShinEtsuChemical_close20,ShinEtsuChemical_close60,ShinEtsuChemical_close120,Siltronic_close20,Siltronic_close60,Siltronic_close120,SKhynix_close20,SKhynix_close60,SKhynix_close120,Sumco_close20,Sumco_close60,Sumco_close120,TokyoElectron_close20,TokyoElectron_close60,TokyoElectron_close120,TSMC_close20,TSMC_close60,TSMC_close120,UMC_close20,UMC_close60,UMC_close120
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-03-03,0.014408,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,290.0,2516.0,...,0.145734,-0.052290,-0.010679,-0.015291,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.120326,-0.209271,-0.189791,0.000000,0.000000,0.000000,-0.041514,-0.027867,-0.020827,-0.011055,-0.006599,0.060755,0.023089,0.023089,0.139448
2006-03-06,-0.007438,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,290.0,2516.0,...,0.145734,-0.022540,-0.031416,-0.015291,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.062518,-0.109863,-0.189791,0.000000,0.000000,0.000000,0.025217,-0.041803,-0.020827,-0.018160,-0.041573,0.060755,0.062568,0.030795,0.139448
2006-03-07,0.020200,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,290.0,2516.0,...,0.145734,-0.038976,-0.088203,-0.015291,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.006884,-0.184315,-0.189791,0.000000,0.000000,0.000000,-0.038792,-0.095931,-0.020827,-0.016093,-0.062502,0.060755,0.018395,-0.005167,0.139448
2006-03-08,0.010531,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,282.0,2565.0,...,0.145734,-0.051614,-0.097788,-0.015291,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.030102,-0.174076,-0.189791,0.000000,0.000000,0.000000,-0.025127,-0.106777,-0.020827,-0.038413,-0.074387,0.060755,-0.025891,-0.010361,0.139448
2006-03-09,-0.024153,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,282.0,2565.0,...,0.145734,-0.090123,-0.110712,-0.015291,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.069678,-0.186032,-0.189791,0.000000,0.000000,0.000000,-0.021576,-0.106777,-0.020827,-0.022756,-0.065686,0.060755,-0.010307,-0.005167,0.139448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-07-26,0.092139,3.1,2.7,1.1,6.2,2.9,116.0,21098.83,4.6,1.1,1.1,1.8,2.0,1.4,1.48266,0.1,0.1,0.2,0.3,2.1,2.4,3.6,7.1,217.0,1694.0,...,0.280970,0.032330,0.100304,0.050009,0.083216,0.178526,0.185368,0.146879,0.056074,-0.058565,0.154149,0.176322,0.098698,-0.062914,0.124298,0.000000,0.011696,0.082700,0.112237,0.066828,0.119819,0.061826,-0.048163,0.013245,0.064539
2019-07-29,0.141737,3.1,2.7,1.1,6.2,2.9,116.0,21098.83,4.6,1.1,1.1,1.8,2.0,1.4,1.48266,0.1,0.1,0.2,0.3,2.1,2.4,3.6,7.1,217.0,1694.0,...,0.269378,0.021930,0.080131,0.031958,0.119251,0.186722,0.187635,0.171937,0.124608,-0.074604,0.098170,0.155665,0.037041,-0.055451,0.139060,-0.017392,0.018934,0.106678,0.133390,0.069707,0.122413,0.047984,-0.028710,0.042982,0.077349
2019-07-30,0.124408,3.1,2.7,1.1,6.2,2.9,116.0,21098.83,4.6,1.1,1.1,1.8,2.0,1.4,1.48266,0.1,0.1,0.2,0.3,2.1,2.4,3.6,7.1,217.0,1694.0,...,0.253918,0.021716,0.091021,0.032753,0.133804,0.201053,0.156763,0.124260,0.118721,-0.155963,0.079092,0.189187,0.029584,-0.018019,0.096311,-0.070204,0.036259,0.124513,0.142351,0.044579,0.101105,0.042895,-0.043621,0.057346,0.096904
2019-07-31,0.111455,3.1,2.7,1.1,6.2,2.9,116.0,21098.83,4.6,1.1,1.1,1.8,2.0,1.4,1.48266,0.1,0.1,0.2,0.3,2.1,2.4,3.6,7.1,229.0,1696.0,...,0.227163,-0.018569,0.064905,-0.008781,0.126177,0.206004,0.143130,0.153817,0.128483,-0.178744,0.018373,0.163512,0.006523,-0.004494,0.105361,-0.065383,0.035950,0.119133,0.116925,0.030911,0.099438,0.040424,-0.055742,0.057346,0.096904


In [44]:
df.describe().T.sort_values(by = 'min')[0:39]

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MBRXYOYWIndex60,3317.0,-inf,,-inf,-0.372675,0.0,0.108214,2.772589
MWINCHNGIndex60,3317.0,-inf,,-inf,-0.405465,0.0,0.268264,2.639057
MWINCHNGIndex120,3317.0,-inf,,-inf,-0.405465,0.0,0.268264,2.639057
KOGCSTOQIndex60,3317.0,,,-inf,-0.09531,0.0,0.182322,inf
PITLCHNGIndex120,3317.0,,,-inf,-0.167054,0.0,0.194156,inf
LEICHNGIndex20,3317.0,,,-inf,-0.262364,0.0,0.310155,inf
LEICHNGIndex60,3317.0,,,-inf,-0.262364,0.0,0.310155,inf
LEICHNGIndex120,3317.0,,,-inf,-0.262364,0.0,0.310155,inf
PITLCHNGIndex60,3317.0,,,-inf,-0.167054,0.0,0.194156,inf
RSTAMOMIndex20,3317.0,,,-inf,-0.496437,-0.105361,0.435318,inf


In [45]:
nan_columns = df.describe().T.sort_values(by = 'min')[0:39].index

In [46]:
df.drop(nan_columns, axis = 1, inplace = True)

In [51]:
df.describe().T.sort_values(by = 'std', ascending = True)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
KOHSTRIndex120,3317.0,5.026698e-04,0.001988,-6.872879e-03,0.000000e+00,0.000000e+00,0.000000e+00,1.087505e-02
KOHSTRIndex60,3317.0,5.026698e-04,0.001988,-6.872879e-03,0.000000e+00,0.000000e+00,0.000000e+00,1.087505e-02
KOHSTRIndex20,3317.0,5.026698e-04,0.001988,-6.872879e-03,0.000000e+00,0.000000e+00,0.000000e+00,1.087505e-02
SKLILCIndex120,3317.0,5.622967e-03,0.002433,-7.371041e-03,4.750603e-03,5.899722e-03,6.743114e-03,1.201938e-02
SKLILCIndex60,3317.0,5.622967e-03,0.002433,-7.371041e-03,4.750603e-03,5.899722e-03,6.743114e-03,1.201938e-02
...,...,...,...,...,...,...,...,...
KOGFTRIndex,3317.0,1.714932e+05,96367.187729,2.186400e+04,8.937200e+04,1.630520e+05,2.360310e+05,4.382620e+05
MTSLRL$Index,3317.0,1.322656e+06,100776.105426,1.126129e+06,1.260142e+06,1.310470e+06,1.398649e+06,1.517565e+06
KOHHLIndex,3317.0,9.235600e+05,274459.406413,5.224090e+05,6.844116e+05,8.750175e+05,1.137953e+06,1.451722e+06
KOHHDIndex,3317.0,9.770850e+05,291323.768834,5.503308e+05,7.235215e+05,9.285616e+05,1.203099e+06,1.539900e+06


In [52]:
print('데이터 추출 전 최종 확인', 'shape: ', df.shape, 'nan값 수 :', df.isna().sum().sum())

데이터 추출 전 최종 확인 shape:  (3317, 1015) nan값 수 : 0


In [53]:
df.to_csv('final_data_60_by_ffill_ver1.csv')