In [1]:
def batch_generator(batch_size, sequence_length):
    """
    Generator function for creating random batches of training-data.
    """

    # Infinite loop.
    while True:
        # Allocate a new array for the batch of input-signals.
        x_shape = (batch_size, sequence_length, num_x_signals)
        x_batch = np.zeros(shape=x_shape, dtype=np.float16)

        # Allocate a new array for the batch of output-signals.
        y_shape = (batch_size, sequence_length, num_y_signals)
        y_batch = np.zeros(shape=y_shape, dtype=np.float16)

        # Fill the batch with random sequences of data.
        for i in range(batch_size):
            # Get a random start-index.
            # This points somewhere into the training-data.
            idx = np.random.randint(num_train - sequence_length)
            
            # Copy the sequences of data starting at this index.
            x_batch[i] = x_train_scaled[idx:idx+sequence_length]
            y_batch[i] = y_train_scaled[idx:idx+sequence_length]
        
        yield (x_batch, y_batch)

def loss_mse_warmup(y_true, y_pred):
    """
    Calculate the Mean Squared Error between y_true and y_pred,
    but ignore the beginning "warmup" part of the sequences.
    
    y_true is the desired output.
    y_pred is the model's output.
    """

    # The shape of both input tensors are:
    # [batch_size, sequence_length, num_y_signals].

    # Ignore the "warmup" parts of the sequences
    # by taking slices of the tensors.
    y_true_slice = y_true[:, warmup_steps:, :]
    y_pred_slice = y_pred[:, warmup_steps:, :]

    # These sliced tensors both have this shape:
    # [batch_size, sequence_length - warmup_steps, num_y_signals]

    # Calculat the Mean Squared Error and use it as loss.
    mse = mean(square(y_true_slice - y_pred_slice))
    
    return mse

def init_model():
    model = Sequential()

    model.add(GRU(units=512,
                  return_sequences=True,
                  input_shape=(None, num_x_signals,)))

    model.add(Dense(num_y_signals, activation='sigmoid'))

    optimizer = RMSprop(lr=1e-3)

    model.compile(loss=loss_mse_warmup, optimizer=optimizer)

    model.summary()

    return model

def pred(model, pred_x_data, y_true):
    '''
    수익률 계산을 위한 실제 주식값, 예측 주식값 생성
    '''
#     pred_x_data = df.values[shift_steps:-1]
#     y_true = np.array(df_targets.values[shift_steps:-1], dtype=np.int)# .reshape(-1,1)

    #print(y_true[-10:])

    pred_x_train_scaled = x_scaler.fit_transform(pred_x_data)
    # y_true_scaled = y_scaler.transform(y_true)

    pred_x = np.expand_dims(pred_x_train_scaled, axis=0)

    pred = model.predict(pred_x)

    pred_rescaled = y_scaler.inverse_transform(pred[0])

    pred = pred_rescaled[:, 0].astype(np.int)

    return y_true, pred

In [2]:
%matplotlib inline
import datetime as dt
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from datetime import timedelta
from dateutil.parser import parse

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, GRU, Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.backend import square, mean

In [3]:
print(tf.__version__)

print(tf.keras.__version__)

print(pd.__version__)
# GPU 확인
tf.test.is_gpu_available( cuda_only=False, min_cuda_compute_capability=None )

2.0.0
2.2.4-tf
1.0.1


True

In [4]:
data = pd.read_csv('result.csv', encoding='cp949').set_index('date')
print(data.shape)
data.head(2)

(1501, 27)


Unnamed: 0_level_0,stock_Asia,stock_Jobi,stock_Hyosung,stock_Farming,stock_Namhae_Chemical,stock_KGChemical,stock_Nongwoo_Bio,stock_Sungbo_Chemical,stock_Asia_Tech,stock_Eastern_Agro,...,cabbage,cabbage1,onion,carrot,green_onion,cucumber,price_egg,price_milk,exchangerate,kospi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-08-01,3300.0,12000.0,17300.0,5780.0,9260.0,13700.0,20650.0,42350.0,3455.0,6380.0,...,5388.0,5534.0,593.0,39369.0,1156.0,62722.0,5987.0,2549.0,1037.75,2073.1
2014-08-04,3300.0,12000.0,17350.0,5900.0,9280.0,13700.0,21100.0,43050.0,3465.0,6370.0,...,9059.0,4286.0,600.0,41218.0,1510.0,43128.0,5987.0,2549.0,1033.35,2080.42


In [5]:
data.loc[str(parse(data.index[-1]).date() + timedelta(days=1))] = [np.nan for _ in range(data.shape[1])]

data.tail(2)

Unnamed: 0_level_0,stock_Asia,stock_Jobi,stock_Hyosung,stock_Farming,stock_Namhae_Chemical,stock_KGChemical,stock_Nongwoo_Bio,stock_Sungbo_Chemical,stock_Asia_Tech,stock_Eastern_Agro,...,cabbage,cabbage1,onion,carrot,green_onion,cucumber,price_egg,price_milk,exchangerate,kospi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-31,3750.0,12350.0,5400.0,9580.0,6990.0,10850.0,8800.0,3300.0,3590.0,5400.0,...,7887.0,12311.0,1279.0,68390.0,844.0,43002.0,5311.0,2623.0,1218.8,1754.64
2020-04-01,,,,,,,,,,,...,,,,,,,,,,


In [6]:
target_name = 'stock_Jobi'
target_names = [target_name,'onion']
#df = data[]
shift_steps = 1

df = data[target_names].shift(1)

df.tail(3)

Unnamed: 0_level_0,stock_Jobi,onion
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-30,9360.0,1180.0
2020-03-31,9500.0,1407.0
2020-04-01,12350.0,1279.0


In [7]:
df_targets = df[target_name].shift(-shift_steps)
# df_targets = df.shift(-shift_steps)

df_targets.tail(3)

date
2020-03-30     9500.0
2020-03-31    12350.0
2020-04-01        NaN
Name: stock_Jobi, dtype: float64

In [8]:
x_data = df.values[shift_steps:-23] # 3월 예측

print(type(x_data))
print("Shape:", x_data.shape)
y_data = df_targets.values[shift_steps:-23].reshape(-1,1)

print(type(y_data))
print("Shape:", y_data.shape)

<class 'numpy.ndarray'>
Shape: (1478, 2)
<class 'numpy.ndarray'>
Shape: (1478, 1)


In [9]:
num_data = len(x_data)
print(num_data)
train_split = 0.9

num_train = int(train_split * num_data)
print(num_train)

num_test = num_data - num_train
print(num_test)

x_train = x_data[0:num_train]
x_test = x_data[num_train:]
print(len(x_train) + len(x_test))

y_train = y_data[0:num_train]
y_test = y_data[num_train:]
print(len(y_train) + len(y_test))

1478
1330
148
1478
1478


In [10]:
# input 갯수
num_x_signals = x_data.shape[1]
# output 갯수
num_y_signals = y_data.shape[1]

print('input ouput 갯수 :',num_x_signals, num_y_signals)

input ouput 갯수 : 2 1


In [11]:
# Scaled Data
x_scaler = MinMaxScaler()

x_train_scaled = x_scaler.fit_transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train)
y_test_scaled = y_scaler.transform(y_test)
# print("Min:", np.min(x_train_scaled))
# print("Max:", np.max(x_train_scaled))

In [12]:
batch_size = 256
sequence_length = 365
warmup_steps = 100

generator = batch_generator(batch_size=batch_size, sequence_length=sequence_length)
x_batch, y_batch = next(generator)

print(x_batch.shape)
print(y_batch.shape)

(256, 365, 2)
(256, 365, 1)


### Model 불러오기

In [13]:
model = init_model()
# [_stock_Jobi_, _onion_].h5
model.load_weights('model/[\'stock_Jobi\', \'onion\'].h5')# Re-evaluate the model 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 512)         792576    
_________________________________________________________________
dense (Dense)                (None, None, 1)           513       
Total params: 793,089
Trainable params: 793,089
Non-trainable params: 0
_________________________________________________________________


In [14]:
t,p = pred(model, df.values[shift_steps:], np.array(df_targets.values[shift_steps:], dtype=np.float))

In [15]:
# date
date = np.array(df.reset_index()['date'][:-1]).reshape(-1,1)

# com_name
com_name = np.array([target_name for _ in range(len(date))]).reshape(-1,1)

# tod_price
tod_price = np.array(data[target_name][:-1]).reshape(-1,1)


# tod_status
tod_status = np.array([0 for _ in range(len(date))]).reshape(-1,1)
sub = tod_price[1:] - tod_price[:-1]
for idx, value in enumerate(sub):
    if value > 0: tod_status[idx+1] = 1
    elif value < 0: tod_status[idx+1] = -1

# tom_price
tom_price = np.array(data[target_name][:-1])
tom_price[1:] += p[1:] - p[:-1]
tom_price = tom_price.reshape(-1,1)

# tom_status
# 오늘 종가로부터 상향, 하향, 유지
tom_status = np.array([0 for _ in range(len(date))]).reshape(-1,1)
for idx, value in enumerate(tom_price - tod_price):
    if value > 0: tom_status[idx] = 1
    elif value < 0: tom_status[idx] = -1
        
# match_status
# 작일 예측 여부 확인
match_status = np.array([True for _ in range(len(date))]).reshape(-1,1)
for idx, value in enumerate(tom_status[:-1] == tod_status[1:]):
    match_status[idx+1] = value

# price_error
# 작일 예측값 - 금일 종가의 절댓값
price_error = np.array([0 for _ in range(len(date))]).reshape(-1,1)
for idx, value in enumerate(tom_price[:-1] - tod_price[1:]):
    price_error[idx + 1] = abs(value)
    
# return
# 금일 수익률
# 작일 tom_status > 0 => (금일 tod_price) - (작일 tod_price) 만큼 수익 발생
# 작일 tom_status <= 0 => 수익 없음
returns = np.array([1.0 for _ in range(len(date))]).reshape(-1,1)

for idx, value in enumerate(tom_status[:-1]):
    if value == 1:
        returns[idx] += (tod_price[idx+1] - tod_price[idx]) / tod_price[idx]
returns = np.round(returns,3)


nan = np.array([np.nan for _ in range(len(date))]).reshape(-1,1)

In [16]:
returns.shape

(1501, 1)

In [17]:
DB_data = pd.DataFrame(np.concatenate([com_name,date,tod_price,tod_status,tom_price,tom_status,match_status,price_error,returns], axis = 1),columns = ['com_name','date','tod_price','tod_status','tom_price','tom_status','match_status','price_error','return'])
# com_name,date,tod_price,tod_status,tom_price,tom_status,match_status,price_error,returns
DB_data

Unnamed: 0,com_name,date,tod_price,tod_status,tom_price,tom_status,match_status,price_error,return
0,stock_Jobi,2014-08-01,12000,0,12000,0,True,0,1
1,stock_Jobi,2014-08-04,12000,0,9173,-1,True,0,1
2,stock_Jobi,2014-08-05,11850,-1,10727,-1,True,2677,1
3,stock_Jobi,2014-08-06,11900,1,11407,-1,False,1173,1
4,stock_Jobi,2014-08-07,11750,-1,11423,-1,True,343,1
...,...,...,...,...,...,...,...,...,...
1496,stock_Jobi,2020-03-25,8910,1,9083,1,False,1167,1.017
1497,stock_Jobi,2020-03-26,9060,1,9170,1,True,23,1.033
1498,stock_Jobi,2020-03-27,9360,1,9424,1,True,190,1.015
1499,stock_Jobi,2020-03-30,9500,1,9552,1,True,76,1.3


In [18]:
# 2020-01-01 이후 수익률
np.prod(returns[-65:-1])

1.4035769468706698

In [19]:
# returns_lst = pd.DataFrame(columns = ['name','error','returns','stock_name']).set_index('name')

returns_lst = pd.read_csv('returns_lst.csv', encoding='cp949').set_index('name')

returns_lst

Unnamed: 0_level_0,error,returns,stock_name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"['stock_Jobi', 'price_milk']",0.00038,1.444,stock_Jobi
"['stock_Jobi', 'price_egg']",0.00028,1.406,stock_Jobi
"['stock_Asia', 'green_pepper']",0.00024,1.228,stock_Asia
['stock_Jobi'],0.00029,1.219,stock_Jobi
"['stock_Asia', 'kospi']",0.00027,1.214,stock_Asia
"['stock_Asia', 'price_egg']",0.00026,1.114,stock_Asia
"['stock_Jobi', 'exchangerate']",0.00024,1.109,stock_Jobi
['stock_Asia'],0.00029,1.096,stock_Asia
"['stock_Asia', 'cabbage1']",0.00027,1.086,stock_Asia
"['stock_Asia', 'exchangerate']",0.00038,1.07,stock_Asia


In [20]:
result = model.evaluate(x=np.expand_dims(x_test_scaled, axis=0),
                        y=np.expand_dims(y_test_scaled, axis=0))

print("loss (test-set):", result)

returns_lst.loc[str([str(i) for i in df.columns])] = [round(result,5), round(np.prod(returns[-65:-1]),3), target_name]
# result_lst.append([str([str(i) for i in df.columns]), (round(result,5))])
returns_lst[returns_lst['stock_name'] == target_name].sort_values(by = ['returns','error'], ascending=[False, True])

loss (test-set): 0.00024581197067163885


Unnamed: 0_level_0,error,returns,stock_name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"['stock_Jobi', 'price_milk']",0.00038,1.444,stock_Jobi
"['stock_Jobi', 'price_egg']",0.00028,1.406,stock_Jobi
"['stock_Jobi', 'onion']",0.00025,1.404,stock_Jobi
['stock_Jobi'],0.00029,1.219,stock_Jobi
"['stock_Jobi', 'exchangerate']",0.00024,1.109,stock_Jobi
"['stock_Jobi', 'carrot']",0.0003,1.038,stock_Jobi
"['stock_Jobi', 'kospi']",0.00051,0.98,stock_Jobi


In [21]:
result = returns_lst.sort_values(by = ['returns','error'], ascending=[False, True])#.set_index('name')
result.to_csv('returns_lst.csv')
result

Unnamed: 0_level_0,error,returns,stock_name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"['stock_Jobi', 'price_milk']",0.00038,1.444,stock_Jobi
"['stock_Jobi', 'price_egg']",0.00028,1.406,stock_Jobi
"['stock_Jobi', 'onion']",0.00025,1.404,stock_Jobi
"['stock_Asia', 'green_pepper']",0.00024,1.228,stock_Asia
['stock_Jobi'],0.00029,1.219,stock_Jobi
"['stock_Asia', 'kospi']",0.00027,1.214,stock_Asia
"['stock_Asia', 'price_egg']",0.00026,1.114,stock_Asia
"['stock_Jobi', 'exchangerate']",0.00024,1.109,stock_Jobi
['stock_Asia'],0.00029,1.096,stock_Asia
"['stock_Asia', 'cabbage1']",0.00027,1.086,stock_Asia


In [22]:
result['stock_name'].unique()

array(['stock_Jobi', 'stock_Asia'], dtype=object)

In [23]:
result[result['stock_name'] == target_name]

Unnamed: 0_level_0,error,returns,stock_name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"['stock_Jobi', 'price_milk']",0.00038,1.444,stock_Jobi
"['stock_Jobi', 'price_egg']",0.00028,1.406,stock_Jobi
"['stock_Jobi', 'onion']",0.00025,1.404,stock_Jobi
['stock_Jobi'],0.00029,1.219,stock_Jobi
"['stock_Jobi', 'exchangerate']",0.00024,1.109,stock_Jobi
"['stock_Jobi', 'carrot']",0.0003,1.038,stock_Jobi
"['stock_Jobi', 'kospi']",0.00051,0.98,stock_Jobi
