In [1]:
## Library Import

# Basic
import re
import warnings
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 100)
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer

# for VIF
from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
# data load
df = pd.read_csv('original_full_data_20200325.csv', index_col = 0)
mmdf = pd.read_csv('bloomberg_map_index_20200324.csv', index_col = 0)



TARGET_DAY = 260

In [3]:
target = df[['date', 'KRXsemiconductor_close']]

In [4]:
df.drop('KRXsemiconductor_close', axis = 1, inplace = True)

In [5]:
len(target)

5051

In [6]:
# target preprocessing

target['date'] = pd.to_datetime(target['date'])
target.set_index('date', inplace = True)

In [7]:
target.isna().sum()

KRXsemiconductor_close    1632
dtype: int64

In [8]:
target = target.dropna()

In [9]:
len(target)

3419

In [10]:
target = (np.log1p(target) - np.log1p(target).shift(periods = TARGET_DAY, axis = 0))

In [25]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,1000.00
2006-01-03,1012.36
2006-01-04,1007.17
2006-01-05,991.68
2006-01-06,1008.15
...,...
2019-10-25,2312.86
2019-10-28,2323.02
2019-10-29,2320.07
2019-10-30,2285.19


In [11]:
target = target.shift(-1 * TARGET_DAY)

In [12]:
target = target[:-1 * TARGET_DAY]

In [13]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,-0.076769
2006-01-03,-0.093983
2006-01-04,-0.099178
2006-01-05,-0.061885
2006-01-06,-0.076766
...,...
2018-10-04,0.065541
2018-10-05,0.087007
2018-10-08,0.086912
2018-10-10,0.095500


In [14]:
len(target)

3159

In [15]:
# Class generates derived variables & shifting target variable

class Builder:

    def __init__(self, input_df, map_df, target_day):
        self.df = input_df
        self.mdf = map_df
        self.target_day = target_day # just target
        self.lag_days = [120, 260] # lagging for prediction

    # not Bloomberg Economic
    
    def noteco(self, target_column):      

        col_name =  []
        # creating new column name
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        # appending lagged columns
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = np.log1p(self.df[target_column]) - np.log1p(self.df[target_column]).shift(periods = j, axis = 0) # 여기에 1을 뺀걸 추가하면 되는거아닌가? 1.얼만에서 1이 빠진거니까

        return self.df 

    # for Bloomberg Economic
    
    def eco(self, target_column):

        col_name = []
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = np.log1p(self.df[target_column]) - np.log1p(self.df[target_column].shift(periods = self.target_day, axis = 0))
        return self.df

    # executing functions
    
    def execution(self):
        for i, j in enumerate(self.mdf['ticker']): # mdf에서 ticker 확인
            if j in self.df.columns:
                    
                if self.mdf.loc[i, :][1] == 'eco':
                    self.eco(j)

                elif self.mdf.loc[i, :][1] == 'noteco':
                    self.noteco(j)
                    self.df.drop(j, axis = 1, inplace = True)

                else:
                    pass
            else:
                pass
                            
        return self.df

In [16]:
len(df.columns)

345

In [17]:
# date preprocessing

df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace = True)

In [18]:
# fillna cubic -> ffill -> bfill

#df = df.interpolate(method = 'cubic', limit_area = 'inside')
df = df.fillna(method = 'ffill')
df = df.fillna(method = 'bfill')

In [19]:
# check na value

df.isna().sum().sum()

0

In [20]:
# class load

cla = Builder(df, mmdf, TARGET_DAY)

In [21]:
# execution function
# 새로운 데이터프레임 생성 
cla.execution()

Unnamed: 0_level_0,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,NFPTCHIndex,...,Microsoft_close260,Nuvoton_close120,Nuvoton_close260,Nvidia_close120,Nvidia_close260,Philadelphia_close120,Philadelphia_close260,QCOM_close120,QCOM_close260,SamsungElectronics_close120,SamsungElectronics_close260,ShinEtsuChemical_close120,ShinEtsuChemical_close260,Siltronic_close120,Siltronic_close260,SKhynix_close120,SKhynix_close260,Sumco_close120,Sumco_close260,TokyoElectron_close120,TokyoElectron_close260,TSMC_close120,TSMC_close260,UMC_close120,UMC_close260
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-01-02,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-03,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-04,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-05,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-06,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-10-27,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.286636,0.135574,0.108673,0.218302,0.320554,0.122168,0.235583,0.051832,0.459050,0.079714,0.127652,0.174040,0.306100,0.243316,-0.057720,0.176306,0.120348,0.000000,-0.004090,0.091316,0.149501,0.195714,0.259170,0.033902,0.164303
2019-10-28,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.316915,0.101262,0.074361,0.229190,0.331441,0.139533,0.252949,0.086698,0.493916,0.087541,0.135480,0.154798,0.286858,0.286738,-0.014298,0.176306,0.120348,-0.004107,-0.008197,0.090834,0.149018,0.199265,0.262722,0.033902,0.164303
2019-10-29,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.304224,0.068474,0.075833,0.198717,0.324239,0.104402,0.238961,0.051382,0.466767,0.092182,0.127119,0.145008,0.293863,0.261677,0.007820,0.170343,0.113424,-0.012220,0.004107,0.095215,0.158142,0.180153,0.251680,0.018238,0.210424
2019-10-30,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,215.0,1655.0,219.0,...,0.300397,0.026499,0.090364,0.222960,0.293122,0.117724,0.217333,0.055234,0.448215,0.085928,0.090262,0.172869,0.273343,0.279844,-0.030918,0.132303,0.071185,-0.028632,-0.016461,0.116841,0.166240,0.183377,0.247422,0.021440,0.185965


In [22]:
len(df.columns)

970

In [23]:
df.columns

Index(['GDPCQOQIndex', 'GDPCYOYIndex', 'GDPCTOT%Index', 'GPDITOC%Index', 'GPGSTOC%Index',
       'RGCDCIPIIndex', 'GDPCUR$Index', 'GDPCURYIndex', 'GDPPIQQIndex', 'GDPCPCECIndex',
       ...
       'SKhynix_close120', 'SKhynix_close260', 'Sumco_close120', 'Sumco_close260',
       'TokyoElectron_close120', 'TokyoElectron_close260', 'TSMC_close120', 'TSMC_close260',
       'UMC_close120', 'UMC_close260'],
      dtype='object', length=970)

In [24]:
df.isna().sum().sum()

537018

In [25]:
# shifting 한 값 날리기

df = df[TARGET_DAY:]

In [26]:
len(df)

4791

In [27]:
len(df)

4791

In [28]:
# nan값이 500을 넘으면 제거 
drop_columns = df.isna().sum().sort_values(ascending = False)[df.isna().sum().sort_values(ascending = False).values>500].index

In [29]:
df.drop(drop_columns, axis = 1, inplace = True)

In [30]:
len(df.columns)

774

In [31]:
# fill na for shifted data

#df = df.interpolate(method = 'cubic', limit_area = 'inside')
df = df.fillna(method = 'ffill')
df = df.fillna(method = 'bfill')

In [32]:
# check na

df.isna().sum().sum()

0

In [33]:
len(df)

4791

In [34]:
# features

len(df.columns)

774

### 이 시점에서 target과 merge

In [35]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,-0.076769
2006-01-03,-0.093983
2006-01-04,-0.099178
2006-01-05,-0.061885
2006-01-06,-0.076766
...,...
2018-10-04,0.065541
2018-10-05,0.087007
2018-10-08,0.086912
2018-10-10,0.095500


In [36]:
merged_df = pd.merge(target, df, how = 'left', on = 'date')

In [37]:
len(merged_df)

3159

In [38]:
df = merged_df.dropna()

In [39]:
df.isna().sum().sum()

0

In [40]:
print('{:.0f}'.format(len(df)/5051*100), '% 만큼 데이터 살았다')

59 % 만큼 데이터 살았다


In [41]:
df

Unnamed: 0_level_0,KRXsemiconductor_close,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,...,Microsoft_close260,Nuvoton_close120,Nuvoton_close260,Nvidia_close120,Nvidia_close260,Philadelphia_close120,Philadelphia_close260,QCOM_close120,QCOM_close260,SamsungElectronics_close120,SamsungElectronics_close260,ShinEtsuChemical_close120,ShinEtsuChemical_close260,Siltronic_close120,Siltronic_close260,SKhynix_close120,SKhynix_close260,Sumco_close120,Sumco_close260,TokyoElectron_close120,TokyoElectron_close260,TSMC_close120,TSMC_close260,UMC_close120,UMC_close260
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-09-19,0.248294,0.9,3.1,2.0,-2.6,-0.1,92.7,13749.81,6.5,3.4,2.9,4.1,2.7,3.3,2.42117,0.5,0.2,0.1,0.3,2.6,2.2,4.7,8.5,318.0,2477.0,...,0.022620,0.000000,0.000000,0.207492,0.420394,-0.023130,-0.072856,-0.223249,-0.166448,0.074108,0.019535,0.000000,0.000000,0.000000,0.000000,0.200526,0.059371,0.000000,0.000000,-0.064981,-0.082307,0.000747,-0.011141,-0.043959,-0.028507
2006-09-20,0.236823,0.9,3.1,2.0,-2.6,-0.1,92.7,13749.81,6.5,3.4,2.9,4.1,2.7,3.3,2.42117,0.5,0.2,0.1,0.3,2.6,2.2,4.7,8.5,310.0,2467.0,...,0.032314,0.000000,0.000000,0.251842,0.454694,-0.002988,-0.068967,-0.170490,-0.140284,0.057070,0.003003,0.000000,0.000000,0.000000,0.000000,0.157181,0.010444,0.000000,0.000000,-0.061928,-0.089795,-0.011778,-0.017598,-0.062811,-0.009924
2006-09-21,0.262899,0.9,3.1,2.0,-2.6,-0.1,92.7,13749.81,6.5,3.4,2.9,4.1,2.7,3.3,2.42117,0.5,0.2,0.1,0.3,2.6,2.2,4.7,8.5,310.0,2467.0,...,0.002877,0.000000,0.000000,0.232146,0.420947,-0.028203,-0.093909,-0.165592,-0.162152,0.055656,-0.049864,0.000000,0.000000,0.000000,0.000000,0.140479,0.077641,0.000000,0.000000,-0.072469,-0.089795,-0.004402,-0.022865,-0.050937,-0.005001
2006-09-22,0.244798,0.9,3.1,2.0,-2.6,-0.1,92.7,13749.81,6.5,3.4,2.9,4.1,2.7,3.3,2.42117,0.5,0.2,0.1,0.3,2.6,2.2,4.7,8.5,310.0,2467.0,...,-0.015527,0.000000,0.000000,0.208045,0.371779,-0.029949,-0.123594,-0.191458,-0.173231,0.059860,-0.037514,0.000000,0.000000,0.000000,0.000000,0.141528,0.060283,0.000000,0.000000,-0.061480,-0.089154,0.001365,-0.044656,-0.068707,-0.015468
2006-09-25,0.231563,0.9,3.1,2.0,-2.6,-0.1,92.7,13749.81,6.5,3.4,2.9,4.1,2.7,3.3,2.42117,0.5,0.2,0.1,0.3,2.6,2.2,4.7,8.5,310.0,2467.0,...,0.010854,0.000000,0.000000,0.240194,0.390524,-0.013343,-0.120291,-0.170190,-0.198078,0.047253,-0.055362,0.000000,0.000000,0.000000,0.000000,0.131403,0.052789,0.000000,0.000000,-0.064981,-0.132552,0.003164,-0.056091,-0.063460,-0.015388
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-10-04,0.065541,3.5,3.2,4.0,-1.8,2.6,-28.0,20510.18,6.0,3.2,2.1,2.9,2.4,2.5,2.11439,0.2,0.2,0.1,0.1,3.4,2.8,3.9,7.5,215.0,1713.0,...,0.227080,-0.485908,-0.466805,0.051765,0.216529,-0.065167,-0.012628,0.187404,0.061255,-0.137714,-4.016422,-0.158181,-0.230887,-0.330446,-0.238354,-0.262361,-0.056919,-0.492996,-0.191682,-0.082364,-0.121134,0.054391,0.005365,-0.101495,-0.008863
2018-10-05,0.087007,3.5,3.2,4.0,-1.8,2.6,-28.0,20510.18,6.0,3.2,2.1,2.9,2.4,2.5,2.11439,0.2,0.2,0.1,0.1,3.4,2.8,3.9,7.5,215.0,1713.0,...,0.208163,-0.551445,-0.530876,0.026032,0.183548,-0.079172,-0.040714,0.158268,0.044748,-0.123975,-4.022049,-0.172504,-0.243538,-0.376015,-0.303179,-0.256985,-0.067379,-0.563265,-0.287682,-0.102406,-0.142743,0.040224,-0.036506,-0.126752,-0.020803
2018-10-08,0.086912,3.5,3.2,4.0,-1.8,2.6,-28.0,20510.18,6.0,3.2,2.1,2.9,2.4,2.5,2.11439,0.2,0.2,0.1,0.1,3.4,2.8,3.9,7.5,215.0,1713.0,...,0.196450,-0.564213,-0.548296,0.013169,0.143493,-0.082016,-0.053119,0.160566,0.041007,-0.099446,-4.004780,-0.162831,-0.257039,-0.348865,-0.334507,-0.216376,-0.029067,-0.637921,-0.469580,-0.103857,-0.135273,0.027368,-0.093994,-0.124454,-0.026748
2018-10-10,0.095500,3.5,3.2,4.0,-1.8,2.6,-28.0,20510.18,6.0,3.2,2.1,2.7,2.2,2.3,2.00137,0.1,0.1,0.0,0.1,3.0,2.6,3.8,7.4,210.0,1706.0,...,0.143223,-0.602653,-0.535916,-0.066224,0.027869,-0.131192,-0.114468,0.108104,-0.023493,-0.086642,-3.993775,-0.190746,-0.334369,-0.424137,-0.455205,-0.229097,-0.014124,-0.683140,-0.546557,-0.098693,-0.148345,0.022981,-0.129769,-0.170475,-0.066840


In [44]:
df.describe().T.sort_values(by = 'min')[0:20]

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
KOMSM1FYIndex260,2981.0,,,-inf,-0.409203,-0.143101,0.437358,inf
FDIUFDYOIndex120,2981.0,,,-inf,-0.044452,0.0,0.191055,inf
FDIUFDYOIndex260,2981.0,,,-inf,-0.044452,0.0,0.191055,inf
RSTAXMOMIndex260,2981.0,,,-inf,-0.485508,0.0,0.435318,inf
RSTAXMOMIndex120,2981.0,,,-inf,-0.485508,0.0,0.435318,inf
KOMSM1FYIndex120,2981.0,,,-inf,-0.409203,-0.143101,0.437358,inf
RSTAMOMIndex260,2981.0,,,-inf,-0.559616,0.0,0.470004,inf
KOHPTYOYIndex260,2981.0,,,-inf,-0.607787,0.065958,0.534082,inf
KOECEXPYIndex120,2981.0,,,-inf,-0.855314,-0.211696,0.236389,inf
RSTAMOMIndex120,2981.0,,,-inf,-0.559616,0.0,0.470004,inf


In [45]:
nan_columns = df.describe().T.sort_values(by = 'min')[0:20].index

In [46]:
df.drop(nan_columns, axis = 1, inplace = True)

In [51]:
df.describe().T.sort_values(by = 'mean', ascending = True)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
USCABALIndex,2981.0,-1.194291e+02,34.902102,-215.770,-117.270,-106.790,-98.980,-75.590
USTBTOTIndex,2981.0,-4.549562e+01,8.912842,-67.823,-49.283,-43.621,-40.349,-25.372
USMMMNCHIndex,2981.0,-1.063670e+01,50.363483,-289.000,-22.000,5.000,19.000,42.000
KOBPCAIndex,2981.0,-3.636229e+00,13.962268,-64.900,-7.400,-2.600,0.600,42.900
EHCAUSIndex,2981.0,-3.089990e+00,1.214146,-5.940,-3.600,-2.640,-2.220,-1.910
...,...,...,...,...,...,...,...,...
KOVMPRODIndex,2981.0,3.524479e+05,56954.069833,188365.000,315921.000,363457.000,395899.000,444049.000
KOHHLIndex,2981.0,9.071381e+05,241923.265112,540544.700,700111.900,875017.500,1072020.200,1409239.900
KOHHDIndex,2981.0,9.595232e+05,256324.152649,569379.000,736347.100,928561.600,1131535.500,1492352.400
MTSLRL$Index,2981.0,1.312579e+06,94715.581008,1126129.000,1238946.000,1307206.000,1388303.000,1489369.000


In [52]:
print('데이터 추출 전 최종 확인', 'shape: ', df.shape, 'nan값 수 :', df.isna().sum().sum())

데이터 추출 전 최종 확인 shape:  (2981, 755) nan값 수 : 0


In [53]:
df.to_csv('final_data_260_by_ffill_ver1.csv')