In [None]:
import zipfile

In [None]:
fantasy_zip = zipfile.ZipFile('./Stock_Dataset(2017_07_06).zip')
fantasy_zip.extractall('./data')

In [1]:
import pandas as pd
from pandas import Series, DataFrame
from sklearn import preprocessing
import numpy as np
import tensorflow as tf
import math
#import matplotlib.pyplot as plt
#import itertools

In [2]:
!pip install xlrd

Collecting xlrd
  Using cached https://files.pythonhosted.org/packages/07/e6/e95c4eec6221bfd8528bcc4ea252a850bffcc4be88ebc367e23a1a84b0bb/xlrd-1.1.0-py2.py3-none-any.whl
[31mmxnet-cu80 1.1.0 has requirement numpy<=1.13.3, but you'll have numpy 1.14.3 which is incompatible.[0m
Installing collected packages: xlrd
Successfully installed xlrd-1.1.0


In [3]:
!pip install openpyxl

Collecting openpyxl
Collecting jdcal (from openpyxl)
  Using cached https://files.pythonhosted.org/packages/a0/38/dcf83532480f25284f3ef13f8ed63e03c58a65c9d3ba2a6a894ed9497207/jdcal-1.4-py2.py3-none-any.whl
Collecting et-xmlfile (from openpyxl)
[31mmxnet-cu80 1.1.0 has requirement numpy<=1.13.3, but you'll have numpy 1.14.3 which is incompatible.[0m
Installing collected packages: jdcal, et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.0.1 jdcal-1.4 openpyxl-2.5.4


In [127]:
### 메소드 정의 
# 상세 데이터를 가져온다.
def get_stock_datail(comp_code) :
    code = format(comp_code, "06d");
    return pd.read_csv('./data/' + code + '.csv')

# matrix 데이터로 변경한다.
def to_ndarray(cols_data) :
    if isinstance(cols_data, Series):
        return np.reshape(list(cols_data), (-1,1))
    elif isinstance(cols_data, DataFrame):
        return cols_data.as_matrix()

# 컬럼을 스케일링을 시킨다.
def get_scaled_cols(data, column_name) :
    scale_data = to_ndarray(data[column_name])
    scaler = preprocessing.MinMaxScaler()
    return scaler.fit_transform(scale_data);

# 데이터를 스케일링 시킨다.
def get_scaled_data(data) :
    scaled_data = data.copy()
    scaled_data['Close'] = get_scaled_cols(scaled_data, 'Close')
    scaled_data['Open'] = get_scaled_cols(scaled_data, 'Open')
    scaled_data['High'] = get_scaled_cols(scaled_data, 'High')
    scaled_data['Low'] = get_scaled_cols(scaled_data, 'Low')
    scaled_data['Volume'] = get_scaled_cols(scaled_data, 'Volume')
    return scaled_data;

# RNN을 위한 데이터로 만든다. 
def get_dataXY(data, train_params) :
    x = to_ndarray(data[['Open', 'High', 'Low', 'Volume', 'Close']])
    y = to_ndarray(data['Close'])
    
    dataX = []
    dataY = []
    seq_length = train_params['seq_length']
    for i in range(0, len(y) - seq_length):
        _x = x[i:i + seq_length]
        _y = y[i + seq_length] # Next close price
        #print(_x, "->", _y)
        dataX.append(_x)
        dataY.append(_y)
    return dataX, dataY, y

# train 및 test 데이터로 나눈다.
def split_train_test(dataX, dataY, train_params, data, y) :
    invest_count = train_params['invest_count']
    seq_length = train_params['seq_length']
    data_count = len(dataY);
    train_size = int(data_count * train_params['train_percent'] / 100)
    train_last = data_count-invest_count;
    
    trainX = np.array(dataX[0:train_size])
    testX = np.array(dataX[train_size:train_last])
    investX = np.array(dataX[train_last:data_count])
    
    trainY = np.array(dataY[0:train_size])
    testY = np.array(dataY[train_size:train_last])
    investY = np.array(dataY[train_last:data_count])
    
    trainCloses = np.array( y[seq_length-1:train_size+seq_length-1])
    testCloses = np.array(dataY[train_size-1:train_last-1])
    investCloses = np.array(dataY[train_last-1:data_count-1])
    investRealCloses = np.array(data['Close'][train_last-1+seq_length:data_count-1+seq_length].values)
    
    return {
        'trainX': trainX, 'trainY': trainY, 'trainCloses': trainCloses,
        'testX': testX, 'testY': testY, 'testCloses' : testCloses,
        'investX': investX,'investY': investY, 'investCloses': investCloses, 'investRealCloses': investRealCloses
    }

# train, test데이터로 만든다.
def get_train_test(data, train_params) :
    scaled_data = get_scaled_data(data)
    dataX, dataY, y = get_dataXY(scaled_data, train_params)
    return split_train_test(dataX, dataY, train_params, data, y)

# 텐스플로우 변수관계 그래프롤 그린다.
def draw_graph(train_params) :
    seq_length = train_params['seq_length']
    data_dim = train_params['data_dim']
    hidden_dims = train_params['hidden_dims']
    
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
    X_closes = tf.placeholder(tf.float32, [None, 1])
    Y = tf.placeholder(tf.float32, [None, 1])
    output_keep_prob = tf.placeholder(tf.float32)
    
    cells = []
    for n in hidden_dims :
        cell = tf.contrib.rnn.BasicLSTMCell(num_units=n, activation=tf.tanh)
        dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=output_keep_prob)
        cells.append(dropout_cell)
    stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(cells)
    outputs, _states = tf.nn.dynamic_rnn(stacked_rnn_cell, X, dtype=tf.float32) 
    Y_pred = tf.contrib.layers.fully_connected(
        outputs[:, -1], train_params['output_dim'], activation_fn=None)  # We use the last cell's output

    # cost/loss
    #not_equal = tf.cast(tf.not_equal(tf.sign(X_closes-Y), tf.sign(X_closes-Y_pred)), tf.float32)
    #loss = tf.reduce_sum(tf.square(Y_pred - Y) + not_equal)
    loss = tf.reduce_sum(tf.square(1-(1+Y_pred-X_closes)/(1+Y-X_closes)))
        
    optimizer = tf.train.AdamOptimizer(train_params['learning_rate'])
    train = optimizer.minimize(loss)

    # RMSE
    targets = tf.placeholder(tf.float32, [None, 1])
    predictions = tf.placeholder(tf.float32, [None, 1])
    #rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
    rmse = tf.sqrt(tf.reduce_mean(tf.square(1-(1+predictions-X_closes)/(1+targets-X_closes))))
    #direction_error = tf.reduce_mean(
    #    tf.cast(tf.not_equal(tf.sign(X_closes-targets), tf.sign(X_closes-predictions)), tf.float32))
    
    return {
        'X': X,
        'Y': Y,
        'output_keep_prob': output_keep_prob,
        'train': train,
        'loss' : loss,
        'Y_pred': Y_pred,
        'targets': targets,
        'rmse' : rmse,
        'predictions': predictions,
        'X_closes' : X_closes,
        #'direction_error' : direction_error
    }

def draw_plot(rmse_vals, test_predict, testY, train_params, comp_name) :
    
    print('rmse_vals : ', rmse_vals[len(rmse_vals)-1])
    plt.figure(1)
    plt.plot(rmse_vals, 'gold')
    plt.xlabel('Epoch(x100)')
    plt.ylabel('Root Mean Square Error')
    plt.title(comp_name)

    plt.figure(2)
    plt.plot(testY, 'r')
    plt.plot(test_predict, 'b')
    plt.xlabel('Time Period')
    plt.ylabel('Stock Price')
    plt.title(comp_name)
    plt.show()

# 학습을 시킨다.
def let_training(data_params, train_params, graph_params, comp_code, comp_name) :
    X = graph_params['X']
    Y = graph_params['Y']
    output_keep_prob = graph_params['output_keep_prob']
    train = graph_params['train']
    loss = graph_params['loss']
    trainX = data_params['trainX']
    trainY = data_params['trainY']
    testX = data_params['testX']
    testY = data_params['testY']
    trainCloses = data_params['trainCloses']
    testCloses = data_params['testCloses']
    
    Y_pred = graph_params['Y_pred']
    targets = graph_params['targets']
    rmse = graph_params['rmse']
    predictions = graph_params['predictions']
    X_closes = graph_params['X_closes']
    loss_up_count = train_params['loss_up_count']
    dropout_keep = train_params['dropout_keep']
    iterations = train_params['iterations']
    rmse_max = train_params['rmse_max']
    
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        # Training step
        min_rmse_val = 999999
        less_cnt = 0
        train_count = 0
        #rmse_vals = []
        
        for i in range(iterations[1]):
            _, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY, X_closes: trainCloses, output_keep_prob: dropout_keep})
            if (i < iterations[0]) :
                continue
            
            test_predict = sess.run(Y_pred, feed_dict={X: testX, output_keep_prob: 1.0})
            rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict, X_closes: testCloses}) 
            #rmse_vals.append(rmse_val)
            if rmse_val < min_rmse_val :
                tf.add_to_collection("X", X)
                tf.add_to_collection("X_closes", X_closes)
                tf.add_to_collection("Y", Y)
                tf.add_to_collection("train", train)
                tf.add_to_collection("Y_pred", Y_pred)
                tf.add_to_collection("output_keep_prob", output_keep_prob)
                saver.save(sess, "./sessions/" + str(comp_code) + ".ckpt")
                less_cnt = 0
                train_count = i;
                max_test_predict, min_rmse_val, = test_predict, rmse_val
            else :
                less_cnt += 1
            if less_cnt > loss_up_count and rmse_max > min_rmse_val:
                #print(less_cnt, rmse_val, train_count, i)
                break
        #draw_plot(rmse_vals, max_test_predict, testY, train_params, comp_name) 
        return max_test_predict, min_rmse_val, train_count 

# 그래프를 그리고 학습을 시킨다.    
def let_leaning(data_params, train_params, comp_code, comp_name):
    graph_params = draw_graph(train_params)
    return let_training(data_params, train_params, graph_params, comp_code, comp_name)

def to_dataFrame(data, columns) :
    return pd.DataFrame(data, columns=columns)

# excel로 저장한다.
def save_excel(df_data, file_name):
    writer = pd.ExcelWriter(file_name)
    df_data.to_excel(writer,'Sheet1', index=False)
    writer.save()

# 예측 값에 따라 매수 매도를 실행한다.    
def let_invest_money(invest_predict, now_scaled_close, now_close, train_params, now_money, now_stock_cnt) :
    seq_length = train_params['seq_length']
    data_dim = train_params['data_dim']
    fee_percent = train_params['fee_percent']
    invest_min_percent = train_params['invest_min_percent']
    
    ratio = (invest_predict - now_scaled_close) /now_scaled_close * 100
    
    if ratio > invest_min_percent :
        cnt = math.floor(now_money/now_close)
        if cnt > 0 :
            fee = now_close * fee_percent/100
            now_money -= (now_close + fee) * cnt
            now_stock_cnt += cnt
    elif ratio < -invest_min_percent :
        if now_stock_cnt > 0 :
            now_money += to_money(now_close, now_stock_cnt, train_params)
            now_stock_cnt = 0
    #print(now_money, now_stock_cnt, now_scaled_close, invest_predict, data_params['testY'])
    return now_money, now_stock_cnt

# 주식매도를 해서 돈으로 바꾼다.
def to_money(now_stock_cnt, now_close, train_params) :
    money = 0
    if now_stock_cnt > 0 :
        fee_percent = train_params['fee_percent'] 
        tax_percent = train_params['tax_percent']
        
        fee = now_close * fee_percent/100
        tax = now_close * tax_percent/100
        money = (now_close - (fee + tax)) * now_stock_cnt
    return money
    
# 학습 후 모의 주식 거래를 한다.
def let_invest(row, train_params, data_params, train_cnt):
    comp_code = row['종목코드']
    invest_count = train_params['invest_count']
    invest_money = train_params['invest_money']
    dropout_keep = train_params['dropout_keep']
    
    investX = data_params['investX']
    investCloses = data_params['investCloses']
    investRealCloses = data_params['investRealCloses']
    investX = data_params['investX']
    investY = data_params['investY']
    testX = data_params['testX']
    testY = data_params['testY']
    testCloses = data_params['testCloses']
    #print(investRealCloses)
    
    now_stock_cnt = 0
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        
        saver.restore(sess, "./sessions/" + str(comp_code) + ".ckpt") 
        X = tf.get_collection('X')[0]
        X_closes = tf.get_collection('X_closes')[0]
        Y = tf.get_collection('Y')[0]
        train = tf.get_collection('train')[0]
        Y_pred = tf.get_collection('Y_pred')[0]
        output_keep_prob = tf.get_collection('output_keep_prob')[0]
        
        for i in range(int(train_cnt/10)):
            sess.run(train, feed_dict={X: testX, Y: testY, X_closes: testCloses, output_keep_prob: dropout_keep})
        
        for i in range(invest_count) :
            np.array([1, 2, 3], ndmin=2)
            invest_predicts = sess.run(Y_pred, feed_dict={X: investX[i:i+1], output_keep_prob: 1.0})
            
            invest_predict = invest_predicts[0][0];
            now_scaled_close = investCloses[0][0]
            now_close = investRealCloses[i]
            #print(invest_predict, now_scaled_close, now_close)
            invest_money, now_stock_cnt = let_invest_money(invest_predict, now_scaled_close, now_close,
                                                           train_params, invest_money, now_stock_cnt)
            for i in range(int(train_cnt/100)):
                sess.run(train, feed_dict={X: investX[i:i+1], Y: investY[i:i+1], X_closes: investCloses[i:i+1], 
                                           output_keep_prob: dropout_keep})
            #break
        invest_money += to_money(now_stock_cnt, now_close, train_params)
    #print(now_money)
    return invest_money

In [129]:
# 파라미터 정의 
# train Parameters
train_params = {
    'seq_length' : 5, # 시퀀스 갯수
    'data_dim' : 5,    # 입력 데이터 갯수
    'hidden_dims' : [128, 96, 64],  # 히든 레이어 갯수 
    'dropout_keep' : 0.8, # dropout 
    'output_dim' : 1,  # 출력 데이터 갯수
    'learning_rate' : 0.0001, 
    'iterations' : [30, 120],  # 최소, 최대 훈련 반복횟수
    'rmse_max' : 0.049,
    'train_percent' : 70.0, # 훈련 데이터 퍼센트
    'loss_up_count' : 12, # early stopping
    'invest_count' : 50,  # 투자 횟수
    'invest_money' : 1000000, # 각 주식에 모의투자할 금액
    'fee_percent' : 0.015, # 투자시 발생하는 수수료
    'tax_percent' : 0.5,   # 매도시 발생하는 세금
    'invest_min_percent' : 0.6 # 투자를 하는 최소 간격 퍼센트 
};


In [6]:
# 주식회사 데이터
corporations = pd.read_excel('./corporations.xlsx')

In [7]:
stock_corps = corporations.query("상장일<'2005-01-01'  ")[['회사명', '종목코드']]
print(len(stock_corps))

1068


In [None]:
# 주식 종목들을 가져와서 학습을 시킨다.
comp_rmses = []
no = 1;
print('no', 'code', 'name', 'rmse', 'invest_result', 'train_cnt')
for idx, row in stock_corps.iterrows():
    #if no < 15 :
    #    no += 1
    #    continue
    comp_code = row['종목코드']
    data = get_stock_datail(comp_code)
    data_params = get_train_test(data, train_params)
    _, rmse_val, train_cnt = let_leaning(data_params, train_params, comp_code, row['회사명'])
    
    now_money = let_invest(row, train_params, data_params, train_cnt)
    print(no, comp_code, row['회사명'], rmse_val, now_money, train_cnt)
    comp_rmses.append([no, comp_code, row['회사명'], rmse_val, now_money, train_cnt])
    no += 1
    

no code name rmse invest_result train_cnt


  if sys.path[0] == '':


INFO:tensorflow:Restoring parameters from ./sessions/1460.ckpt
1 1460 BYC 0.1255814 991087.8500000001 119
INFO:tensorflow:Restoring parameters from ./sessions/79160.ckpt
2 79160 CJ CGV 0.05155008 1000000 109
INFO:tensorflow:Restoring parameters from ./sessions/5830.ckpt
3 5830 DB손해보험 0.026662242 1000000 77
INFO:tensorflow:Restoring parameters from ./sessions/69730.ckpt
4 69730 DSR제강 0.052121233 865171.8550000001 119
INFO:tensorflow:Restoring parameters from ./sessions/9440.ckpt
5 9440 KC그린홀딩스 0.022932425 883278.92 47
INFO:tensorflow:Restoring parameters from ./sessions/1940.ckpt
6 1940 KISCO홀딩스 0.03029946 1187526.775 64
INFO:tensorflow:Restoring parameters from ./sessions/23150.ckpt
7 23150 MH에탄올 0.020541461 1000000 66
INFO:tensorflow:Restoring parameters from ./sessions/34310.ckpt
8 34310 NICE 0.035440963 907234.5350000003 45
INFO:tensorflow:Restoring parameters from ./sessions/36530.ckpt
9 36530 S&T홀딩스 0.030738382 1079336.3725 63
INFO:tensorflow:Restoring parameters from ./sessions/1

78 40610 SG&G 0.067750886 987220.8369999999 119
INFO:tensorflow:Restoring parameters from ./sessions/36490.ckpt
79 36490 SK머티리얼즈 0.037641343 1042932.45 119
INFO:tensorflow:Restoring parameters from ./sessions/38530.ckpt
80 38530 골드퍼시픽 0.049444996 10986577.580000002 47
INFO:tensorflow:Restoring parameters from ./sessions/14200.ckpt
81 14200 광림 0.06648371 1014002.9454999999 116
INFO:tensorflow:Restoring parameters from ./sessions/53270.ckpt
82 53270 구영테크 0.040669665 1000000 77
INFO:tensorflow:Restoring parameters from ./sessions/6050.ckpt
83 6050 국영지앤엠 0.039105885 1074092.21 50
INFO:tensorflow:Restoring parameters from ./sessions/64260.ckpt
84 64260 다날 0.03573241 918506.1124999999 61
INFO:tensorflow:Restoring parameters from ./sessions/39560.ckpt
85 39560 다산네트웍스 0.042353347 921048.0150000001 49
INFO:tensorflow:Restoring parameters from ./sessions/4780.ckpt
86 4780 대륙제관 0.026022281 986413.4450000001 62
INFO:tensorflow:Restoring parameters from ./sessions/3310.ckpt
87 3310 대주산업 0.025301723

In [131]:
# 엑셀파일로 저장한다.
df_comp_rmses = pd.DataFrame(comp_rmses, columns=['no', 'code', 'name', 'rmse', 'invest_result', 'train_cnt'])    
save_excel(df_comp_rmses, 'training_invest_result_try13.xlsx')