In [None]:
import zipfile

In [None]:
fantasy_zip = zipfile.ZipFile('./Stock_Dataset(2017_07_06).zip')
fantasy_zip.extractall('./data')

In [1]:
import pandas as pd
from pandas import Series, DataFrame
from sklearn import preprocessing
import numpy as np
import tensorflow as tf
import math
import matplotlib.pyplot as plt
import itertools

In [2]:
!pip install xlrd

Collecting xlrd
  Using cached https://files.pythonhosted.org/packages/07/e6/e95c4eec6221bfd8528bcc4ea252a850bffcc4be88ebc367e23a1a84b0bb/xlrd-1.1.0-py2.py3-none-any.whl
[31mmxnet-cu80 1.1.0 has requirement numpy<=1.13.3, but you'll have numpy 1.14.3 which is incompatible.[0m
Installing collected packages: xlrd
Successfully installed xlrd-1.1.0


In [3]:
!pip install openpyxl

Collecting openpyxl
Collecting et-xmlfile (from openpyxl)
Collecting jdcal (from openpyxl)
  Using cached https://files.pythonhosted.org/packages/a0/38/dcf83532480f25284f3ef13f8ed63e03c58a65c9d3ba2a6a894ed9497207/jdcal-1.4-py2.py3-none-any.whl
[31mmxnet-cu80 1.1.0 has requirement numpy<=1.13.3, but you'll have numpy 1.14.3 which is incompatible.[0m
Installing collected packages: et-xmlfile, jdcal, openpyxl
Successfully installed et-xmlfile-1.0.1 jdcal-1.4 openpyxl-2.5.4


In [4]:
### 메소드 정의 
# 상세 데이터를 가져온다.
def get_stock_datail(comp_code) :
    code = format(comp_code, "06d");
    return pd.read_csv('./data/' + code + '.csv')

# matrix 데이터로 변경한다.
def to_ndarray(cols_data) :
    if isinstance(cols_data, Series):
        return np.reshape(list(cols_data), (-1,1))
    elif isinstance(cols_data, DataFrame):
        return cols_data.as_matrix()

# 컬럼을 스케일링을 시킨다.
def get_scaled_cols(data, column_name) :
    scale_data = to_ndarray(data[column_name])
    scaler = preprocessing.MinMaxScaler()
    return scaler.fit_transform(scale_data);

# 데이터를 스케일링 시킨다.
def get_scaled_data(data) :
    scaled_data = data.copy()
    scaled_data['Close'] = get_scaled_cols(scaled_data, 'Close')
    scaled_data['Open'] = get_scaled_cols(scaled_data, 'Open')
    scaled_data['High'] = get_scaled_cols(scaled_data, 'High')
    scaled_data['Low'] = get_scaled_cols(scaled_data, 'Low')
    scaled_data['Volume'] = get_scaled_cols(scaled_data, 'Volume')
    return scaled_data;

# RNN을 위한 데이터로 만든다. 
def get_dataXY(data, train_params) :
    x = to_ndarray(data[['Open', 'High', 'Low', 'Volume', 'Close']])
    y = to_ndarray(data['Close'])
    
    dataX = []
    dataY = []
    seq_length = train_params['seq_length']
    for i in range(0, len(y) - seq_length):
        _x = x[i:i + seq_length]
        _y = y[i + seq_length] # Next close price
        #print(_x, "->", _y)
        dataX.append(_x)
        dataY.append(_y)
    return dataX, dataY, y

# train 및 test 데이터로 나눈다.
def split_train_test(dataX, dataY, train_params, data, y) :
    invest_count = train_params['invest_count']
    seq_length = train_params['seq_length']
    data_count = len(dataY);
    train_size = int(data_count * train_params['train_percent'] / 100)
    train_last = data_count-invest_count;
    
    trainX = np.array(dataX[0:train_size])
    testX = np.array(dataX[train_size:train_last])
    investX = np.array(dataX[train_last:data_count])
    
    trainY = np.array(dataY[0:train_size])
    testY = np.array(dataY[train_size:train_last])
    investY = np.array(dataY[train_last:data_count])
    
    trainCloses = np.array( y[seq_length-1:train_size+seq_length-1])
    testCloses = np.array(dataY[train_size-1:train_last-1])
    investCloses = np.array(dataY[train_last-1:data_count-1])
    investRealCloses = np.array(data['Close'][train_last-1+seq_length:data_count-1+seq_length].values)
    
    return {
        'trainX': trainX, 'trainY': trainY, 'trainCloses': trainCloses,
        'testX': testX, 'testY': testY, 'testCloses' : testCloses,
        'investX': investX,'investY': investY, 'investCloses': investCloses, 'investRealCloses': investRealCloses
    }

# train, test데이터로 만든다.
def get_train_test(data, train_params) :
    scaled_data = get_scaled_data(data)
    dataX, dataY, y = get_dataXY(scaled_data, train_params)
    return split_train_test(dataX, dataY, train_params, data, y)

# 텐스플로우 변수관계 그래프롤 그린다.
def draw_graph(train_params) :
    seq_length = train_params['seq_length']
    data_dim = train_params['data_dim']
    hidden_dims = train_params['hidden_dims']
    
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
    X_closes = tf.placeholder(tf.float32, [None, 1])
    Y = tf.placeholder(tf.float32, [None, 1])
    output_keep_prob = tf.placeholder(tf.float32)
    
    cells = []
    for n in hidden_dims :
        cell = tf.contrib.rnn.BasicLSTMCell(num_units=n, activation=tf.tanh)
        dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=output_keep_prob)
        cells.append(dropout_cell)
    stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(cells)
    outputs, _states = tf.nn.dynamic_rnn(stacked_rnn_cell, X, dtype=tf.float32) 
    Y_pred = tf.contrib.layers.fully_connected(
        outputs[:, -1], train_params['output_dim'], activation_fn=None)  # We use the last cell's output

    # cost/loss
    not_equal = tf.cast(tf.not_equal(tf.sign(X_closes-Y), tf.sign(X_closes-Y_pred)), tf.float32)
    loss = tf.reduce_sum(tf.square(Y_pred - Y) + not_equal)
        
    optimizer = tf.train.AdamOptimizer(train_params['learning_rate'])
    train = optimizer.minimize(loss)

    # RMSE
    targets = tf.placeholder(tf.float32, [None, 1])
    predictions = tf.placeholder(tf.float32, [None, 1])
    rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
    direction_error = tf.reduce_mean(
        tf.cast(tf.not_equal(tf.sign(X_closes-targets), tf.sign(X_closes-predictions)), tf.float32))
    
    return {
        'X': X,
        'Y': Y,
        'output_keep_prob': output_keep_prob,
        'train': train,
        'loss' : loss,
        'Y_pred': Y_pred,
        'targets': targets,
        'rmse' : rmse,
        'predictions': predictions,
        'X_closes' : X_closes,
        'direction_error' : direction_error
    }

def draw_plot(rmse_vals, test_predict, testY, train_params) :
    print('seq_length : ', train_params['seq_length'])
    print('hidden_dims : ', train_params['hidden_dims'])
    print('dropout_keep : ', train_params['dropout_keep'])
    print('rmse_vals : ', rmse_vals[len(rmse_vals)-1])
    plt.figure(1)
    plt.plot(rmse_vals, 'gold')
    plt.xlabel('Epoch(x100)')
    plt.ylabel('Root Mean Square Error')

    plt.figure(2)
    plt.plot(testY, 'r')
    plt.plot(test_predict, 'b')
    plt.xlabel('Time Period')
    plt.ylabel('Stock Price')
    plt.show()

# 학습을 시킨다.
def let_training(data_params, train_params, graph_params, comp_code) :
    X = graph_params['X']
    Y = graph_params['Y']
    output_keep_prob = graph_params['output_keep_prob']
    train = graph_params['train']
    loss = graph_params['loss']
    trainX = data_params['trainX']
    trainY = data_params['trainY']
    testX = data_params['testX']
    testY = data_params['testY']
    trainCloses = data_params['trainCloses']
    testCloses = data_params['testCloses']
    
    Y_pred = graph_params['Y_pred']
    targets = graph_params['targets']
    rmse = graph_params['rmse']
    predictions = graph_params['predictions']
    X_closes = graph_params['X_closes']
    direction_error = graph_params['direction_error']
    loss_up_count = train_params['loss_up_count']
    dropout_keep = train_params['dropout_keep']
    
#     saver = tf.train.Saver()
    
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        # Training step
        min_rmse_val = 999999
        min_direction_error_val = 999999
        less_cnt = 0
        train_count = 0
        rmse_vals = []
        
        for i in range(train_params['iterations']):
            _, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY, X_closes: trainCloses, output_keep_prob: dropout_keep})
            if i % 100 == 0 :
                test_predict = sess.run(Y_pred, feed_dict={X: testX, output_keep_prob: 1.0})
                rmse_val, direction_error_val = sess.run([rmse,  direction_error], feed_dict={targets: testY, predictions: test_predict, X_closes: testCloses}) 
                #print(step_loss, rmse_val, direction_error_val)
                #if rmse_val < min_rmse_val :
                rmse_vals.append(rmse_val)
                if direction_error_val + rmse_val < min_direction_error_val + min_rmse_val :
                #if direction_error_val < min_direction_error_val :
#                     tf.add_to_collection("X", X)
#                     tf.add_to_collection("X_closes", X_closes)
#                     tf.add_to_collection("Y", Y)
#                     tf.add_to_collection("train", train)
#                     tf.add_to_collection("Y_pred", Y_pred)
#                     tf.add_to_collection("output_keep_prob", output_keep_prob)
#                     saver.save(sess, "./sessions/" + str(comp_code) + ".ckpt")
                    less_cnt = 0
                    train_count = i;
                    max_test_predict, min_rmse_val, min_direction_error_val = test_predict, rmse_val, direction_error_val
                else :
                    less_cnt += 1
                if less_cnt > loss_up_count :
                    break
        #draw_plot(rmse_vals, max_test_predict, testY, train_params) 
        return max_test_predict, min_rmse_val, min_direction_error_val, train_count 

# 그래프를 그리고 학습을 시킨다.    
def let_leaning(data_params, train_params, comp_code):
    graph_params = draw_graph(train_params)
    return let_training(data_params, train_params, graph_params, comp_code)

def to_dataFrame(data, columns) :
    return pd.DataFrame(data, columns=columns)

# excel로 저장한다.
def save_excel(df_data, file_name):
    writer = pd.ExcelWriter(file_name)
    df_data.to_excel(writer,'Sheet1', index=False)
    writer.save()

# 예측 값에 따라 매수 매도를 실행한다.    
def let_invest_money(invest_predict, now_scaled_close, now_close, train_params, now_money, now_stock_cnt) :
    seq_length = train_params['seq_length']
    data_dim = train_params['data_dim']
    fee_percent = train_params['fee_percent']
    invest_min_percent = train_params['invest_min_percent']
    
    ratio = (invest_predict - now_scaled_close) /now_scaled_close * 100
    
    if ratio > invest_min_percent :
        cnt = math.floor(now_money/now_close)
        if cnt > 0 :
            fee = now_close * fee_percent/100
            now_money -= (now_close + fee) * cnt
            now_stock_cnt += cnt
    elif ratio < -invest_min_percent :
        if now_stock_cnt > 0 :
            now_money += to_money(now_close, now_stock_cnt, train_params)
            now_stock_cnt = 0
    #print(now_money, now_stock_cnt, now_scaled_close, invest_predict, data_params['testY'])
    return now_money, now_stock_cnt

# 주식매도를 해서 돈으로 바꾼다.
def to_money(now_stock_cnt, now_close, train_params) :
    money = 0
    if now_stock_cnt > 0 :
        fee_percent = train_params['fee_percent'] 
        tax_percent = train_params['tax_percent']
        
        fee = now_close * fee_percent/100
        tax = now_close * tax_percent/100
        money = (now_close - (fee + tax)) * now_stock_cnt
    return money
    
# 학습 후 모의 주식 거래를 한다.
def let_invest(row, train_params, data_params, train_cnt):
    comp_code = row['종목코드']
    invest_count = train_params['invest_count']
    invest_money = train_params['invest_money']
    dropout_keep = train_params['dropout_keep']
    
    investX = data_params['investX']
    investCloses = data_params['investCloses']
    investRealCloses = data_params['investRealCloses']
    investX = data_params['investX']
    investY = data_params['investY']
    testX = data_params['testX']
    testY = data_params['testY']
    testCloses = data_params['testCloses']
    #print(investRealCloses)
    
    now_stock_cnt = 0
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        
        saver.restore(sess, "./sessions/" + str(comp_code) + ".ckpt") 
        X = tf.get_collection('X')[0]
        X_closes = tf.get_collection('X_closes')[0]
        Y = tf.get_collection('Y')[0]
        train = tf.get_collection('train')[0]
        Y_pred = tf.get_collection('Y_pred')[0]
        output_keep_prob = tf.get_collection('output_keep_prob')[0]
        
        for i in range(int(train_cnt/10)):
            sess.run(train, feed_dict={X: testX, Y: testY, X_closes: testCloses, output_keep_prob: dropout_keep})
        
        for i in range(invest_count) :
            np.array([1, 2, 3], ndmin=2)
            invest_predicts = sess.run(Y_pred, feed_dict={X: investX[i:i+1], output_keep_prob: 1.0})
            
            invest_predict = invest_predicts[0][0];
            now_scaled_close = investCloses[0][0]
            now_close = investRealCloses[i]
            #print(invest_predict, now_scaled_close, now_close)
            invest_money, now_stock_cnt = let_invest_money(invest_predict, now_scaled_close, now_close,
                                                           train_params, invest_money, now_stock_cnt)
            for i in range(int(train_cnt/100)):
                sess.run(train, feed_dict={X: investX[i:i+1], Y: investY[i:i+1], X_closes: investCloses[i:i+1], 
                                           output_keep_prob: dropout_keep})
            #break
        invest_money += to_money(now_stock_cnt, now_close, train_params)
    #print(now_money)
    return invest_money

In [5]:
# 파라미터 정의 
# train Parameters
train_params = {
    'seq_length' : 28, # 시퀀스 갯수
    'data_dim' : 5,    # 입력 데이터 갯수
    'hidden_dims' : [128, 64, 128, 512, 128, 32, 128, 64, 128, 32],  # 히든 레이어 갯수 
    'dropout_keep' : 0.5, # dropout 
    'output_dim' : 1,  # 출력 데이터 갯수
    'learning_rate' : 0.0001, 
    'iterations' : 100000,  # 최대 훈련 반복횟수
    'train_percent' : 70.0, # 훈련 데이터 퍼센트
    'loss_up_count' : 10, # early stopping
    #'invest_corp_count' : 100, # 투자하는 주식회사 갯수
    'invest_count' : 50,  # 투자 횟수
    'invest_money' : 1000000, # 각 주식에 모의투자할 금액
    'fee_percent' : 0.015, # 투자시 발생하는 수수료
    'tax_percent' : 0.5,   # 매도시 발생하는 세금
    'invest_min_percent' : 0.6 # 투자를 하는 최소 간격 퍼센트 
};


In [6]:
# 주식회사 데이터
corporations = pd.read_excel('./corporations.xlsx')

In [7]:
stock_corps = corporations.query("상장일<'2005-01-01'  ")[['회사명', '종목코드']]
print(len(stock_corps))

1068


In [None]:
# 최적의 하이퍼 파라미터를 찾는다. - 전체 범위를 광범위하게 테스트
hyper_rmses = []
print('no', 'seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error')
use_cnt = 3
seq_length_list = list(range(1,20,3))
dropout_keep_list = [(n+1)/10.0 for n in range(0,10,3) ]
hidden_dims_list = [[ np.random.randint(1,10)*16 for m in range(n)] for n in range(1,10,3) ]

try_no = 1;
for params in itertools.product(seq_length_list,dropout_keep_list,hidden_dims_list):
    train_params['seq_length'] = params[0]
    train_params['dropout_keep'] = params[1]
    train_params['hidden_dims'] = params[2]
    
    no = 0;
    sum_rmse_val = 0.0
    sum_direction_error = 0.0
    for idx, row in stock_corps.iterrows():
        comp_code = row['종목코드']
        data = get_stock_datail(comp_code)
        data_params = get_train_test(data, train_params)
        _, rmse_val, direction_error_val, train_cnt = let_leaning(data_params, train_params, comp_code)
        sum_rmse_val += rmse_val
        sum_direction_error += direction_error_val
        #now_money = let_invest(row, train_params, data_params, train_cnt)
        #print(sum_rmse_val, sum_direction_error)
        if no > use_cnt-2 :
            break
        no += 1;
    rmse = sum_rmse_val/use_cnt
    direction_error = sum_direction_error/use_cnt
    print(try_no, train_params['seq_length'], train_params['hidden_dims'], 
                        train_params['dropout_keep'], rmse, direction_error)
    hyper_rmses.append([train_params['seq_length'], train_params['hidden_dims'], 
                        train_params['dropout_keep'], rmse, direction_error])
    try_no += 1
 
# seq_length = 7, dropout_keep = 1.0, hidden_dims = [112] 가 선택됨 

no seq_length hidden_dims dropout_keep rmse direction_error


  if sys.path[0] == '':


1 1 [144] 0.1 0.02661822612086932 0.5279720425605774
2 1 [144, 32, 32, 128] 0.1 0.1983448565006256 0.5314685304959615
3 1 [96, 16, 112, 112, 80, 32, 32] 0.1 0.29513386885325116 0.5314685304959615
4 1 [144] 0.4 0.024952248980601627 0.5264180302619934
5 1 [144, 32, 32, 128] 0.4 0.046232384319106735 0.5345765153566996
6 1 [96, 16, 112, 112, 80, 32, 32] 0.4 0.08030591905117035 0.5306915243466696
7 1 [144] 0.7 0.02824184422691663 0.5198135276635488
8 1 [144, 32, 32, 128] 0.7 0.040042703971266747 0.5050505101680756
9 1 [96, 16, 112, 112, 80, 32, 32] 0.7 0.04260092104474703 0.5392385323842367
10 1 [144] 1.0 0.026159755264719326 0.5202020307381948
11 1 [144, 32, 32, 128] 1.0 0.03902618587017059 0.5233100255330404
12 1 [96, 16, 112, 112, 80, 32, 32] 1.0 0.04899625355998675 0.5135975082715353
13 4 [144] 0.1 0.03456407723327478 0.513029932975769
14 4 [144, 32, 32, 128] 0.1 0.11225633323192596 0.5309218168258667
15 4 [96, 16, 112, 112, 80, 32, 32] 0.1 0.21070600052674612 0.5313107768694559
16 4 [1

In [10]:
# 하이퍼 파라이터 결과 값을 저장한다.
df_hyper_rmses = pd.DataFrame(hyper_rmses, columns=['seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error'])    
save_excel(df_hyper_rmses, 'hyper_rmses_wide.xlsx')   

In [21]:
[[ np.random.randint(1,10)*16 for m in range(n)] for n in range(1,4) ]

[[128], [96, 48], [112, 48, 112]]

In [15]:
# 최적의 하이퍼 파라미터를 찾는다. 2 - 좁혀진 범위에서 촘촘하게 테스트 
hyper_rmses = []
print('no', 'seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error')
use_cnt = 3
seq_length_list = list(range(5,10))
dropout_keep_list = [(n+1)/10.0 for n in range(7,10) ]
hidden_dims_list = [[ np.random.randint(1,5)*32 for m in range(n)] for n in range(1,4) ]

try_no = 1;
for params in itertools.product(seq_length_list,dropout_keep_list,hidden_dims_list):
    train_params['seq_length'] = params[0]
    train_params['dropout_keep'] = params[1]
    train_params['hidden_dims'] = params[2]
    
    no = 0;
    sum_rmse_val = 0.0
    sum_direction_error = 0.0
    for idx, row in stock_corps.iterrows():
        comp_code = row['종목코드']
        data = get_stock_datail(comp_code)
        data_params = get_train_test(data, train_params)
        _, rmse_val, direction_error_val, train_cnt = let_leaning(data_params, train_params, comp_code)
        sum_rmse_val += rmse_val
        sum_direction_error += direction_error_val
        #now_money = let_invest(row, train_params, data_params, train_cnt)
        #print(sum_rmse_val, sum_direction_error)
        if no > use_cnt-2 :
            break
        no += 1;
    rmse = sum_rmse_val/use_cnt
    direction_error = sum_direction_error/use_cnt
    print(try_no, train_params['seq_length'], train_params['hidden_dims'], 
                        train_params['dropout_keep'], rmse, direction_error)
    hyper_rmses.append([train_params['seq_length'], train_params['hidden_dims'], 
                        train_params['dropout_keep'], rmse, direction_error])
    try_no += 1

# seq_length = 5, dropout_keep = 0.8, hidden_dims = [96, 64] 가 선택됨     

no seq_length hidden_dims dropout_keep rmse direction_error


  if sys.path[0] == '':


36 8 [32, 32, 64] 1.0 0.05296915272871653 0.6841900448004404
37 9 [128] 0.8 0.04417183933158716 0.673294335603714
38 9 [128, 96] 0.8 0.04390620688597361 0.6748538017272949
39 9 [32, 32, 64] 0.8 0.05421902736028036 0.6635477642218272
40 9 [128] 0.9 0.05331631066898505 0.6729044914245605
41 9 [128, 96] 0.9 0.04402166232466698 0.6764132479826609
42 9 [32, 32, 64] 0.9 0.053065831462542214 0.6748538017272949
43 9 [128] 1.0 0.046463631093502045 0.6693957050641378
44 9 [128, 96] 1.0 0.04665865749120712 0.6756335298220316
45 9 [32, 32, 64] 1.0 0.05383550003170967 0.686549723148346


In [16]:
# 하이퍼 파라이터 결과 값을 저장한다.
df_hyper_rmses = pd.DataFrame(hyper_rmses, columns=['seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error'])    
save_excel(df_hyper_rmses, 'hyper_rmses_select.xlsx')  

In [24]:
# 최적의 하이퍼 파라미터를 찾는다. 3 - hidden_dims 를 몇 번 더 반복해본다.(값을 랜덤으로 했기 때문에...)
hyper_rmses = []
print('no', 'seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error')
use_cnt = 3

for j in range(7) :

    seq_length_list = [5]
    dropout_keep_list = [0.8]
    hidden_dims_list = [[ np.random.randint(1,5)*32 for m in range(n)] for n in range(1,4) ]

    try_no = 1;
    for params in itertools.product(seq_length_list,dropout_keep_list,hidden_dims_list):
        train_params['seq_length'] = params[0]
        train_params['dropout_keep'] = params[1]
        train_params['hidden_dims'] = params[2]

        no = 0;
        sum_rmse_val = 0.0
        sum_direction_error = 0.0
        for idx, row in stock_corps.iterrows():
            comp_code = row['종목코드']
            data = get_stock_datail(comp_code)
            data_params = get_train_test(data, train_params)
            _, rmse_val, direction_error_val, train_cnt = let_leaning(data_params, train_params, comp_code)
            sum_rmse_val += rmse_val
            sum_direction_error += direction_error_val
            #now_money = let_invest(row, train_params, data_params, train_cnt)
            #print(sum_rmse_val, sum_direction_error)
            if no > use_cnt-2 :
                break
            no += 1;
        rmse = sum_rmse_val/use_cnt
        direction_error = sum_direction_error/use_cnt
        print(try_no, train_params['seq_length'], train_params['hidden_dims'], 
                            train_params['dropout_keep'], rmse, direction_error)
        hyper_rmses.append([train_params['seq_length'], train_params['hidden_dims'], 
                            train_params['dropout_keep'], rmse, direction_error])
        try_no += 1

# seq_length = 5, dropout_keep = 0.8, hidden_dims = [128, 32] 가 선택됨         

no seq_length hidden_dims dropout_keep rmse direction_error


  if sys.path[0] == '':


1 5 [96] 0.8 0.030787284175554912 0.5186915894349416
2 5 [128, 32] 0.8 0.03276223627229532 0.5054517090320587
3 5 [128, 128, 64] 0.8 0.03636598959565163 0.514018694559733
1 5 [96] 0.8 0.03931254396835963 0.5081775784492493
2 5 [96, 32] 0.8 0.036602544908722244 0.5132398704687754
3 5 [128, 128, 32] 0.8 0.03413493558764458 0.5144080917040507
1 5 [96] 0.8 0.030777699624498684 0.5105140209197998
2 5 [64, 64] 0.8 0.0422152200092872 0.5198598106702169
3 5 [128, 32, 32] 0.8 0.03628784790635109 0.5116822322209676
1 5 [96] 0.8 0.0405144194761912 0.5085669755935669
2 5 [32, 64] 0.8 0.03771666002770265 0.506619930267334
3 5 [128, 96, 64] 0.8 0.035910896956920624 0.5050623019536337
1 5 [64] 0.8 0.027002083758513134 0.5093457897504171
2 5 [128, 64] 0.8 0.03963865153491497 0.5050623118877411
3 5 [64, 96, 128] 0.8 0.03741348721086979 0.5031152566274008
1 5 [64] 0.8 0.034928091491262116 0.5120716591676077
2 5 [32, 64] 0.8 0.03796032629907131 0.5085669855276743
3 5 [32, 32, 96] 0.8 0.040620225171248116

In [25]:
# 하이퍼 파라이터 결과 값을 저장한다.
df_hyper_rmses = pd.DataFrame(hyper_rmses, columns=['seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error'])    
save_excel(df_hyper_rmses, 'hyper_rmses_select_hidden_dims.xlsx')  

In [27]:
# 최적의 하이퍼 파라미터를 찾는다. 3 - hidden_dims 를 몇 번 더 반복해본다.(값을 랜덤으로 했기 때문에...) - 최종후보 
hyper_rmses = []
print('no', 'seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error')
use_cnt = 3

seq_length_list = [5]
dropout_keep_list = [0.8]
hidden_dims_list = [[64],[128,32],[96, 64], [64, 96, 128], [96], [128, 96, 64], [32]]

try_no = 1;
for params in itertools.product(seq_length_list,dropout_keep_list,hidden_dims_list):
    train_params['seq_length'] = params[0]
    train_params['dropout_keep'] = params[1]
    train_params['hidden_dims'] = params[2]

    no = 0;
    sum_rmse_val = 0.0
    sum_direction_error = 0.0
    for idx, row in stock_corps.iterrows():
        comp_code = row['종목코드']
        data = get_stock_datail(comp_code)
        data_params = get_train_test(data, train_params)
        _, rmse_val, direction_error_val, train_cnt = let_leaning(data_params, train_params, comp_code)
        sum_rmse_val += rmse_val
        sum_direction_error += direction_error_val
        #now_money = let_invest(row, train_params, data_params, train_cnt)
        #print(sum_rmse_val, sum_direction_error)
        if no > use_cnt-2 :
            break
        no += 1;
    rmse = sum_rmse_val/use_cnt
    direction_error = sum_direction_error/use_cnt
    print(try_no, train_params['seq_length'], train_params['hidden_dims'], 
                        train_params['dropout_keep'], rmse, direction_error)
    hyper_rmses.append([train_params['seq_length'], train_params['hidden_dims'], 
                        train_params['dropout_keep'], rmse, direction_error])
    try_no += 1
    
# seq_length = 5, dropout_keep = 0.8, hidden_dims = [128, 96, 64] 가 선택됨     

no seq_length hidden_dims dropout_keep rmse direction_error


  if sys.path[0] == '':


1 5 [64] 0.8 0.028680693358182907 0.5144081115722656
2 5 [128, 32] 0.8 0.03138316671053568 0.5085669755935669
3 5 [96, 64] 0.8 0.030631134907404583 0.5101246138413748
4 5 [64, 96, 128] 0.8 0.03665653429925442 0.504672904809316
5 5 [96] 0.8 0.03152791472772757 0.508956382671992
6 5 [128, 96, 64] 0.8 0.031166815509398777 0.5081775784492493
7 5 [32] 0.8 0.03075985920925935 0.5101246138413748


In [28]:
# 하이퍼 파라이터 결과 값을 저장한다.
df_hyper_rmses = pd.DataFrame(hyper_rmses, columns=['seq_length', 'hidden_dims', 'dropout_keep', 'rmse', 'direction_error'])    
save_excel(df_hyper_rmses, 'hyper_rmses_select_hidden_dims_last.xlsx')  