In [1]:
## Library Import

# Basic
import re
import warnings
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 100)
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer

# for VIF
from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
# data load
df = pd.read_csv('original_full_data_20200325.csv', index_col = 0)
mmdf = pd.read_csv('bloomberg_map_index_20200324.csv', index_col = 0)

In [3]:
target = df[['date', 'KRXsemiconductor_close']]

In [4]:
df.drop('KRXsemiconductor_close', axis = 1, inplace = True)

In [5]:
len(target)

5051

In [6]:
# target preprocessing

target['date'] = pd.to_datetime(target['date'])
target.set_index('date', inplace = True)

In [7]:
target.isna().sum()

KRXsemiconductor_close    1632
dtype: int64

In [8]:
target = target.dropna()

In [9]:
len(target)

3419

In [10]:
target = (np.log1p(target) - np.log1p(target).shift(periods = 120, axis = 0))

In [11]:
target = target.shift(-120)

In [12]:
target = target[:-120]

In [13]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,-0.144893
2006-01-03,-0.164362
2006-01-04,-0.141828
2006-01-05,-0.100628
2006-01-06,-0.112238
...,...
2019-04-30,-0.006857
2019-05-02,-0.012257
2019-05-03,-0.012498
2019-05-07,-0.009651


In [14]:
len(target)

3299

# 120일
### 120일

- eco : raw, t-120
- noteco : -60, -120, -260

In [15]:
# Class generates derived variables & shifting target variable

class Builder:

    def __init__(self, input_df, map_df, target_day):
        self.df = input_df
        self.mdf = map_df
        self.target_day = target_day # just target
        self.lag_days = [60, 120, 260] # lagging for prediction

    # not Bloomberg Economic
    
    def noteco(self, target_column):      

        col_name =  []
        # creating new column name
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        # appending lagged columns
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = np.log1p(self.df[target_column]) - np.log1p(self.df[target_column]).shift(periods = j, axis = 0) # 여기에 1을 뺀걸 추가하면 되는거아닌가? 1.얼만에서 1이 빠진거니까

        return self.df 

    # for Bloomberg Economic
    
    def eco(self, target_column):

        col_name = []
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = np.log1p(self.df[target_column]) - np.log1p(self.df[target_column].shift(periods = self.target_day, axis = 0))
        return self.df

    # executing functions
    
    def execution(self):
        for i, j in enumerate(self.mdf['ticker']): # mdf에서 ticker 확인
            if j in self.df.columns:
                    
                if self.mdf.loc[i, :][1] == 'eco':
                    self.eco(j)

                elif self.mdf.loc[i, :][1] == 'noteco':
                    self.noteco(j)
                    self.df.drop(j, axis = 1, inplace = True)

                else:
                    pass
            else:
                pass
                            
        return self.df

In [16]:
len(df.columns)

362

In [17]:
# date preprocessing

df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace = True)

In [18]:
# fillna cubic -> ffill -> bfill

#df = df.interpolate(method = 'cubic', limit_area = 'inside')
df = df.fillna(method = 'ffill')
df = df.fillna(method = 'bfill')

In [19]:
# check na value

df.isna().sum().sum()

0

In [25]:
TARGET_DAY = 120

In [20]:
# class load

cla = Builder(df, mmdf, TARGET_DAY)

In [21]:
# execution function
# 새로운 데이터프레임 생성 
cla.execution()

Unnamed: 0_level_0,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,NFPTCHIndex,...,QCOM_close260,SamsungElectronics_close60,SamsungElectronics_close120,SamsungElectronics_close260,ShinEtsuChemical_close60,ShinEtsuChemical_close120,ShinEtsuChemical_close260,Siltronic_close60,Siltronic_close120,Siltronic_close260,SKhynix_close60,SKhynix_close120,SKhynix_close260,Sumco_close60,Sumco_close120,Sumco_close260,TokyoElectron_close60,TokyoElectron_close120,TokyoElectron_close260,TSMC_close60,TSMC_close120,TSMC_close260,UMC_close60,UMC_close120,UMC_close260
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-01-02,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-03,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-04,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-05,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-06,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-10-27,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.459050,0.142267,0.079714,0.127652,0.123778,0.174040,0.306100,0.359567,0.243316,-0.057720,0.123073,0.176306,0.120348,0.179048,0.000000,-0.004090,0.105682,0.091316,0.149501,0.160807,0.195714,0.259170,0.088666,0.033902,0.164303
2019-10-28,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.493916,0.167228,0.087541,0.135480,0.085888,0.154798,0.286858,0.407947,0.286738,-0.014298,0.123073,0.176306,0.120348,0.174941,-0.004107,-0.008197,0.103058,0.090834,0.149018,0.154521,0.199265,0.262722,0.080093,0.033902,0.164303
2019-10-29,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,210.0,1656.0,219.0,...,0.466767,0.149592,0.092182,0.127119,0.093916,0.145008,0.293863,0.387078,0.261677,0.007820,0.069853,0.170343,0.113424,0.169292,-0.012220,0.004107,0.098902,0.095215,0.158142,0.151037,0.180153,0.251680,0.068563,0.018238,0.210424
2019-10-30,2.0,2.3,4.6,-6.3,4.8,69.4,21340.27,4.0,2.4,1.9,1.7,2.4,1.4,1.77407,0.1,0.3,0.1,0.3,1.8,2.3,3.7,7.2,215.0,1655.0,219.0,...,0.448215,0.135799,0.085928,0.090262,0.100175,0.172869,0.273343,0.386620,0.279844,-0.030918,0.051616,0.132303,0.071185,0.156921,-0.028632,-0.016461,0.113140,0.116841,0.166240,0.156846,0.183377,0.247422,0.062520,0.021440,0.185965


In [26]:
len(df.columns)

1325

In [23]:
df.columns

Index(['GDPCQOQIndex', 'GDPCYOYIndex', 'GDPCTOT%Index', 'GPDITOC%Index', 'GPGSTOC%Index',
       'RGCDCIPIIndex', 'GDPCUR$Index', 'GDPCURYIndex', 'GDPPIQQIndex', 'GDPCPCECIndex',
       ...
       'Sumco_close260', 'TokyoElectron_close60', 'TokyoElectron_close120',
       'TokyoElectron_close260', 'TSMC_close60', 'TSMC_close120', 'TSMC_close260', 'UMC_close60',
       'UMC_close120', 'UMC_close260'],
      dtype='object', length=1325)

In [24]:
df.isna().sum().sum()

653680

In [27]:
# shifting 한 값 날리기

df = df[TARGET_DAY:]

In [28]:
len(df)

4931

In [39]:
len(df)

4931

In [45]:
# nan값이 500을 넘으면 제거 
drop_columns = df.isna().sum().sort_values(ascending = False)[df.isna().sum().sort_values(ascending = False).values>500].index

In [46]:
df.drop(drop_columns, axis = 1, inplace = True)

In [47]:
len(df.columns)

1058

In [48]:
# fill na for shifted data

#df = df.interpolate(method = 'cubic', limit_area = 'inside')
df = df.fillna(method = 'ffill')
df = df.fillna(method = 'bfill')

In [50]:
# check na

df.isna().sum().sum()

0

In [51]:
len(df)

4931

In [52]:
# features

len(df.columns)

1058

### 이 시점에서 target과 merge

In [53]:
target

Unnamed: 0_level_0,KRXsemiconductor_close
date,Unnamed: 1_level_1
2006-01-02,-0.144893
2006-01-03,-0.164362
2006-01-04,-0.141828
2006-01-05,-0.100628
2006-01-06,-0.112238
...,...
2019-04-30,-0.006857
2019-05-02,-0.012257
2019-05-03,-0.012498
2019-05-07,-0.009651


In [54]:
merged_df = pd.merge(target, df, how = 'left', on = 'date')

In [107]:
len(merged_df)

3299

In [57]:
df = merged_df.dropna()

In [58]:
df.isna().sum().sum()

0

In [71]:
print('{:.0f}'.format(len(df)/5051*100), '% 만큼 데이터 살았다')

64 % 만큼 데이터 살았다


In [74]:
df

Unnamed: 0_level_0,KRXsemiconductor_close,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,...,QCOM_close260,SamsungElectronics_close60,SamsungElectronics_close120,SamsungElectronics_close260,ShinEtsuChemical_close60,ShinEtsuChemical_close120,ShinEtsuChemical_close260,Siltronic_close60,Siltronic_close120,Siltronic_close260,SKhynix_close60,SKhynix_close120,SKhynix_close260,Sumco_close60,Sumco_close120,Sumco_close260,TokyoElectron_close60,TokyoElectron_close120,TokyoElectron_close260,TSMC_close60,TSMC_close120,TSMC_close260,UMC_close60,UMC_close120,UMC_close260
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-05-02,-0.011413,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,3.6,2.1,3.0,2.05608,0.1,0.2,0.1,0.3,2.6,2.2,4.8,8.4,303.0,2456.0,...,-0.166448,-0.004612,-0.015291,0.019535,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.019481,-0.189791,0.059371,0.000000,0.000000,0.000000,0.007040,-0.020827,-0.082307,0.067354,0.060755,-0.011141,0.116359,0.139448,-0.028507
2006-05-03,-0.026891,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,3.6,2.1,3.0,2.05608,0.1,0.2,0.1,0.3,2.6,2.2,4.8,8.4,295.0,2439.0,...,-0.166448,-0.020140,-0.039883,0.019535,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.016260,-0.206221,0.059371,0.000000,0.000000,0.000000,0.007040,-0.020827,-0.082307,0.069106,0.062508,-0.011141,0.120974,0.144063,-0.028507
2006-05-04,-0.006050,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,3.6,2.1,3.0,2.05608,0.1,0.2,0.1,0.3,2.6,2.2,4.8,8.4,295.0,2439.0,...,-0.166448,-0.010794,-0.080400,0.019535,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.022691,-0.123461,0.059371,0.000000,0.000000,0.000000,0.007040,-0.020827,-0.082307,0.072767,0.054942,-0.011141,0.132092,0.165488,-0.028507
2006-05-08,0.027848,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,3.6,2.1,3.0,2.05608,0.1,0.2,0.1,0.3,2.6,2.2,4.8,8.4,295.0,2439.0,...,-0.166448,0.063013,-0.047699,0.019535,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.079247,-0.106785,0.059371,0.000000,0.000000,0.000000,0.042696,-0.064081,-0.082307,0.121722,0.056036,-0.011141,0.180817,0.175651,-0.028507
2006-05-09,0.015242,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,3.6,2.1,3.0,2.05608,0.1,0.2,0.1,0.3,2.6,2.2,4.8,8.4,295.0,2439.0,...,-0.166448,0.064034,-0.040035,0.019535,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.110953,-0.112183,0.059371,0.000000,0.000000,0.000000,0.021120,-0.064081,-0.082307,0.088308,0.008748,-0.011141,0.141407,0.113579,-0.028507
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-04-30,-0.006857,1.1,2.5,1.4,3.0,-0.4,93.0,20897.80,4.9,1.6,1.7,1.5,2.1,1.3,1.61549,0.2,0.1,0.2,0.2,1.9,2.5,3.8,7.3,216.0,1755.0,...,0.280174,0.016493,0.169532,0.017602,0.114831,0.198769,-0.017385,0.002272,0.233292,-0.404633,0.120951,0.266801,0.046640,-0.016950,0.000000,-0.392619,0.029788,0.101573,-0.046908,0.068177,0.112336,0.065328,0.103598,0.124952,-0.171683
2019-05-02,-0.012257,1.1,2.5,1.4,3.0,-0.4,93.0,20897.80,4.9,1.6,1.7,1.5,2.1,1.3,1.61549,0.2,0.1,0.2,0.2,1.9,2.5,3.8,7.3,212.0,1726.0,...,0.279021,0.017582,0.169330,0.016474,0.116461,0.208391,-0.008918,-0.024869,0.203862,-0.359992,0.142241,0.286440,0.061321,-0.012685,0.004264,-0.384075,0.029788,0.101573,-0.047926,0.067836,0.142539,0.060739,0.106768,0.138971,-0.155054
2019-05-03,-0.012498,1.1,2.5,1.4,3.0,-0.4,93.0,20897.80,4.9,1.6,1.7,1.5,2.1,1.3,1.61549,0.2,0.1,0.2,0.2,1.9,2.5,3.8,7.3,212.0,1726.0,...,0.301923,0.009983,0.186298,0.023451,0.116918,0.210902,-0.017357,-0.034451,0.219205,-0.348634,0.137090,0.331752,0.073533,-0.008403,0.051960,-0.345294,0.012349,0.101573,-0.028927,0.103762,0.182816,0.088638,0.122115,0.176730,-0.142062
2019-05-07,-0.009651,1.1,2.5,1.4,3.0,-0.4,93.0,20897.80,4.9,1.6,1.7,1.5,2.1,1.3,1.61549,0.2,0.1,0.2,0.2,1.9,2.5,3.8,7.3,212.0,1726.0,...,0.249592,0.023689,0.146189,0.022548,0.117352,0.105791,-0.050410,0.048802,0.091460,-0.356913,0.180568,0.308332,0.067295,0.044851,-0.013072,-0.325130,0.068842,0.065454,-0.026571,0.116463,0.180956,0.075995,0.093090,0.143101,-0.182322


In [90]:
df.describe().T.sort_values(by = 'min')[0:36]

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MBRXYOYWIndex260,3216.0,-inf,,-inf,-0.5193,0.0,0.077962,2.662588
PITLCHNGIndex120,3216.0,,,-inf,-0.241162,0.0,0.207639,inf
PITLCHNGIndex60,3216.0,,,-inf,-0.241162,0.0,0.207639,inf
MWINCHNGIndex60,3216.0,-inf,,-inf,-0.405465,-0.04652,0.252943,3.178054
KOHPTYOYIndex60,3216.0,,,-inf,-0.281412,0.063513,0.287682,inf
MBRXYOYWIndex120,3216.0,-inf,,-inf,-0.5193,0.0,0.077962,2.662588
MBRXYOYWIndex60,3216.0,-inf,,-inf,-0.5193,0.0,0.077962,2.662588
KOGCSTOQIndex60,3216.0,,,-inf,-0.405465,0.0,0.422857,inf
RSTAXMOMIndex260,3216.0,,,-inf,-0.459532,-0.054067,0.550046,inf
KOECSGVQIndex120,3216.0,,,-inf,-0.328504,0.0,0.336472,inf


In [91]:
nan_columns = df.describe().T.sort_values(by = 'min')[0:36].index

In [93]:
df.drop(nan_columns, axis = 1, inplace = True)

In [100]:
df.describe().T.sort_values(by = 'mean', ascending = True)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
USCABALIndex,3216.0,-1.222916e+02,36.372659,-2.157700e+02,-1.439300e+02,-107.760,-9.910000e+01,-75.590
USTBTOTIndex,3216.0,-4.645034e+01,9.321000,-6.782300e+01,-5.314100e+01,-43.876,-4.061825e+01,-25.372
per,3216.0,-2.972904e+01,88.916815,-3.788113e+02,-2.441914e+00,5.805,8.770000e+00,41.700
USMMMNCHIndex,3216.0,-9.093595e+00,48.952730,-2.890000e+02,-2.100000e+01,7.000,1.900000e+01,42.000
EHCAUSIndex,3216.0,-3.130942e+00,1.263452,-5.940000e+00,-4.210000e+00,-2.640,-2.220000e+00,-1.910
...,...,...,...,...,...,...,...,...
KOVMPRODIndex,3216.0,3.506984e+05,56450.952474,1.883650e+05,3.128470e+05,362008.000,3.950110e+05,444049.000
KOHHLIndex,3216.0,9.188219e+05,265359.023947,5.224090e+05,6.844116e+05,875017.500,1.101448e+06,1446628.400
KOHHDIndex,3216.0,9.720263e+05,281562.020941,5.503308e+05,7.235215e+05,928561.600,1.164895e+06,1536712.300
MTSLRL$Index,3216.0,1.319723e+06,99011.476916,1.126129e+06,1.254588e+06,1310470.000,1.395592e+06,1517565.000


In [105]:
print('데이터 추출 전 최종 확인', 'shape: ', df.shape, 'nan값 수 :', df.isna().sum().sum())

데이터 추출 전 최종 확인 shape:  (3216, 1023) nan값 수 : 0


In [106]:
df.to_csv('final_data_120_by_ffill_ver1.csv')