In [1]:
import  pandas as  pd
from pandas import Series, DataFrame
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
lotto_data = pd.read_csv('lotto_data.data', delimiter='\t')
lotto_data = lotto_data.sort_values('no')

In [16]:
lotto_data[['1th', '2th', '3th', '4th', '5th', '6th']]

Unnamed: 0,1th,2th,3th,4th,5th,6th
816,10,23,29,33,37,40
815,9,13,21,25,32,42
814,11,16,19,21,27,31
813,14,27,30,31,40,42
812,16,24,29,40,41,42
811,14,15,26,27,40,42
810,2,9,16,25,26,40
809,8,19,25,34,37,39
808,2,4,16,17,36,39
807,9,25,30,33,41,44


In [56]:
# matrix 데이터로 변경한다.
def to_ndarray(cols_data) :
    if isinstance(cols_data, Series):
        return np.reshape(list(cols_data), (-1,1))
    elif isinstance(cols_data, DataFrame):
        return cols_data.as_matrix()

def get_lotto_data() :
    lotto_data = pd.read_csv('lotto_data.data', delimiter='\t')
    return lotto_data.sort_values('no')

# RNN을 위한 데이터로 만든다. 
def get_dataXY(data) :
    data = data[['1th', '2th', '3th', '4th', '5th', '6th']]/45
    #print(data)
    data = data.sort_values(by=1, axis=1)
    x = to_ndarray(data)
    
    dataX = []
    dataY = []
    seq_length = params['seq_length']
    x_len = len(x)
    for i in range(0, x_len - seq_length-1):
        _x = x[i:i + seq_length]
        _y = x[i + seq_length] # Next close price
        dataX.append(_x)
        dataY.append(_y)
    return dataX, dataY

# train 및 test 데이터로 나눈다.
def split_train_test(dataX, dataY) :
    seq_length = params['seq_length']
    data_count = len(dataY);
    train_size = int(data_count * params['train_percent'] / 100)
    
    trainX = np.array(dataX[0:train_size])
    testX = np.array(dataX[train_size:])
    
    trainY = np.array(dataY[0:train_size])
    testY = np.array(dataY[train_size:])
    
    return {
        'trainX': trainX, 'trainY': trainY, 
        'testX': testX, 'testY': testY 
    }

# 텐스플로우 변수관계 그래프롤 그린다.
def draw_graph() :
    seq_length = params['seq_length']
    data_dim = params['data_dim']
    hidden_dims = params['hidden_dims']
    
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, [None, seq_length, data_dim])
    Y = tf.placeholder(tf.float32, [None, params['output_dim']])
    output_keep_prob = tf.placeholder(tf.float32)
    
    cells = []
    for n in hidden_dims :
        cell = tf.contrib.rnn.BasicLSTMCell(num_units=n, activation=tf.tanh)
        dropout_cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=output_keep_prob)
        cells.append(dropout_cell)
    stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(cells)
    outputs, _states = tf.nn.dynamic_rnn(stacked_rnn_cell, X, dtype=tf.float32)
    outputs = tf.transpose(outputs, [1, 0, 2])
    last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)
    
    Y_pred = tf.contrib.layers.fully_connected (outputs, params['output_dim'], activation_fn=None) 

    # cost/loss
    loss = tf.reduce_sum(tf.square(Y_pred - Y))
    
    
    optimizer = tf.train.AdamOptimizer(params['learning_rate'])
    train = optimizer.minimize(loss)

    # RMSE
    targets = tf.placeholder(tf.float32, [None, params['output_dim']])
    predictions = tf.placeholder(tf.float32, [None, params['output_dim']])
    rmse = tf.sqrt(tf.reduce_mean(predictions - targets))
    
    return {
        'X': X,
        'Y': Y,
        'output_keep_prob': output_keep_prob,
        'train': train,
        'loss' : loss,
        'Y_pred': Y_pred,
        'targets': targets,
        'rmse' : rmse,
        'predictions': predictions,
    }

def draw_plot(rmse_vals, test_predict, invest_predicts, data_params) :
    testY = data_params['testY']
    investY = data_params['investY']
    y = np.append(testY,investY)
    predict =  np.append(test_predict, invest_predicts)
    
    mpl.rcParams['axes.unicode_minus'] = False
    font_name = fm.FontProperties(fname=params['kor_font_path'], size=50).get_name()
    plt.rc('font', family=font_name)
    
    plt.figure(1)
    plt.plot(rmse_vals, 'gold')
    plt.xlabel('Epoch')
    plt.ylabel('Root Mean Square Error')
    
    plt.figure(2)
    plt.plot(y, 'b')
    plt.plot(predict, 'r')
    plt.xlabel('Time Period')
    plt.ylabel('Stock Price')
    plt.show()

def save_learning_image(sess, saver, graph_params) :
    X = graph_params['X']
    Y = graph_params['Y']
    train = graph_params['train']
    Y_pred = graph_params['Y_pred']
    output_keep_prob = graph_params['output_keep_prob']
    
    tf.add_to_collection("X", X)
    tf.add_to_collection("Y", Y)
    tf.add_to_collection("train", train)
    tf.add_to_collection("Y_pred", Y_pred)
    tf.add_to_collection("output_keep_prob", output_keep_prob)
    saver.save(sess, "./sessions/lotto.ckpt")
    
# 학습을 시킨다.
def let_training(graph_params, data_params) :
    X = graph_params['X']
    Y = graph_params['Y']
    output_keep_prob = graph_params['output_keep_prob']
    train = graph_params['train']
    loss = graph_params['loss']
    trainX = data_params['trainX']
    trainY = data_params['trainY']
    testX = data_params['testX']
    testY = data_params['testY']
    
    Y_pred = graph_params['Y_pred']
    targets = graph_params['targets']
    rmse = graph_params['rmse']
    predictions = graph_params['predictions']
    loss_up_count = params['loss_up_count']
    dropout_keep = params['dropout_keep']
    iterations = params['iterations']
    rmse_max = params['rmse_max']
    
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        # Training step
        max_test_predict = []
        min_rmse_val = 999999
        less_cnt = 0
        train_count = 0
        rmse_vals = []
        
        for i in range(iterations[1]):
            _, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY, output_keep_prob: dropout_keep})
            test_predict = sess.run(Y_pred, feed_dict={X: testX, output_keep_prob: 1.0})
            #print(test_predict*45);
            rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict}) 
            #print(rmse_val);
            rmse_vals.append(rmse_val)
            if rmse_val < min_rmse_val :
                save_learning_image(sess, saver, graph_params)
                less_cnt = 0
                train_count = i;
                #print(test_predict)
                max_test_predict, min_rmse_val = test_predict, rmse_val
            else :
                less_cnt += 1
            if i > iterations[0] and less_cnt > loss_up_count and rmse_max > min_rmse_val:
                break
        #draw_plot(rmse_vals, max_test_predict, testY, comp_name) 
        return min_rmse_val, train_count, rmse_vals, max_test_predict


# 그래프를 그리고 학습을 시킨다.    
def let_leaning(data_params):
    graph_params = draw_graph()
    return let_training(graph_params, data_params)

dataX, dataY = get_dataXY(lotto_data)
#print(dataX,dataY)
data_params = split_train_test(dataX, dataY)
let_leaning(data_params)

  


Tensor("strided_slice:0", shape=(?, 5, 64), dtype=float32)


ValueError: Only call `softmax_cross_entropy_with_logits` with named arguments (labels=..., logits=..., ...)

In [29]:
params = {
    'seq_length' : 5, # 시퀀스 갯수
    'data_dim' : 6,    # 입력 데이터 갯수
    'hidden_dims' : [128, 96, 64],  # 히든 레이어 갯수 
    'dropout_keep' : 0.8, # dropout 
    'output_dim' : 6,  # 출력 데이터 갯수
    'learning_rate' : 0.0001, 
    'iterations' : [10, 200],  # 최소, 최대 훈련 반복횟수
    'rmse_max' : 0.045,
    'train_percent' : 80.0, # 훈련 데이터 퍼센트
    'loss_up_count' : 12, # early stopping
    'kor_font_path' : 'C:\\WINDOWS\\Fonts\\H2GTRM.TTF'
}