In [46]:
import pandas as pd
import codecs
import glob
import os
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import math
import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from pandas.tools.plotting import scatter_matrix
import operator as op
import tensorflow as tf
import sys
from collections import namedtuple

import matplotlib.font_manager as fm
#fm.findSystemFonts()

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [47]:
# フォルダ内の絶対ファイルパス返す
def get_filepath(path):
    return [os.path.abspath(p) for p in glob.glob(path)]

In [48]:
# ファイル読み込み（全年結合）
count = 1
for path in get_filepath('./data/1321/*.csv'):
    with codecs.open(path, "r", "Shift-JIS", "ignore") as file:
        if count == 1:
            df = pd.read_table(file, delimiter=",", header = 1, index_col='日付', parse_dates=['日付'])
        else:
            df = pd.concat([df, pd.read_table(file, delimiter=",", header = 1, index_col='日付', parse_dates=['日付'])])
        
        count += 1     

In [49]:
NYDow = pd.read_table('./data/NYdow/DJI.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
NYDow = NYDow.drop(columns = ['Open', 'High', 'Low', 'Adj Close', 'Volume'])

In [50]:
YenDoll = pd.read_table('./data/為替/quote.csv', delimiter=",", header = 2)
YenDoll = YenDoll.drop(columns = ['GBP', 'EUR', 'CAD', 'CHF', 'SEK', 'DKK', 'NOK',
       'AUD', 'NZD', 'ZAR', 'BHD', 'IDR(100)', 'CNY', 'HKD', 'INR', 'MYR',
       'PHP', 'SGD', 'KRW(100)', 'THB', 'KWD', 'SAR', 'AED', 'MXN', 'PGK',
       'HUF', 'CZK', 'PLN', 'RUB', 'TRY', 'Unnamed: 32', 'IDR(100).1', 'CNY.1',
       'MYR.1', 'KRW(100).1', 'TWD'])
YenDoll['Unnamed: 0'] = pd.to_datetime(YenDoll['Unnamed: 0'])
YenDoll.set_index('Unnamed: 0', inplace=True)

In [51]:
EURONEXT = pd.read_table('./data/^N100.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
EURONEXT = EURONEXT.drop(columns = ['High', 'Low', 'Adj Close', 'Volume'])
EURONEXT = EURONEXT.rename(columns={'Open': 'Open_EURONEXT', 'Close': 'Close_EURONEXT'})

In [52]:
ALLORDS = pd.read_table('./data/^AORD.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
ALLORDS = ALLORDS.drop(columns = ['High', 'Low', 'Adj Close', 'Volume'])
ALLORDS = ALLORDS.rename(columns={'Open': 'Open_ALLORDS', 'Close': 'Close_ALLORDS'})

In [53]:
HANGSENG = pd.read_table('./data/^HSI.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
HANGSENG = HANGSENG.drop(columns = ['High', 'Low', 'Adj Close', 'Volume'])
HANGSENG = HANGSENG.rename(columns={'Open': 'Open_HANGSENG', 'Close': 'Close_HANGSENG'})

In [54]:
DAX = pd.read_table('./data/^GDAXI.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
DAX = DAX.drop(columns = ['High', 'Low', 'Adj Close', 'Volume'])
DAX = DAX.rename(columns={'Open': 'Open_DAX', 'Close': 'Close_DAX'})

In [55]:
NYSE = pd.read_table('./data/^NYA.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
NYSE = NYSE.drop(columns = ['High', 'Low', 'Adj Close', 'Volume'])
NYSE = NYSE.rename(columns={'Open': 'Open_NYSE', 'Close': 'Close_NYSE'})

In [56]:
SP500 = pd.read_table('./data/^GSPC.csv', delimiter=",", index_col='Date', parse_dates=['Date'])
SP500 = SP500.drop(columns = ['High', 'Low', 'Adj Close', 'Volume'])
SP500 = SP500.rename(columns={'Open': 'Open_SP500', 'Close': 'Close_SP500'})

In [57]:
data = pd.DataFrame()
data = df.merge(NYDow, how = "outer", left_index = True, right_index=True)
data = data.merge(YenDoll, how = "outer", left_index = True, right_index=True)
data = data.merge(EURONEXT, how = "outer", left_index = True, right_index=True)
data = data.merge(ALLORDS, how = "outer", left_index = True, right_index=True)
data = data.merge(HANGSENG, how = "outer", left_index = True, right_index=True)
data = data.merge(DAX, how = "outer", left_index = True, right_index=True)
data = data.merge(NYSE, how = "outer", left_index = True, right_index=True)
data = data.merge(SP500, how = "outer", left_index = True, right_index=True)

In [58]:
data = data[data.index >= '2002-04-02']
data = data.fillna(method='ffill')

In [59]:
data = data.drop(columns = ['終値調整値'])

In [60]:
data.info() # 日経のデータは2018-08-24 まで

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4652 entries, 2002-04-02 to 2018-09-03
Data columns (total 19 columns):
始値                4652 non-null float64
高値                4652 non-null float64
安値                4652 non-null float64
終値                4652 non-null float64
出来高               4652 non-null float64
Close             4652 non-null float64
USD               4652 non-null float64
Open_EURONEXT     4652 non-null float64
Close_EURONEXT    4652 non-null float64
Open_ALLORDS      4652 non-null float64
Close_ALLORDS     4652 non-null float64
Open_HANGSENG     4652 non-null float64
Close_HANGSENG    4652 non-null float64
Open_DAX          4652 non-null float64
Close_DAX         4652 non-null float64
Open_NYSE         4652 non-null float64
Close_NYSE        4652 non-null float64
Open_SP500        4652 non-null float64
Close_SP500       4652 non-null float64
dtypes: float64(19)
memory usage: 726.9 KB


In [61]:
#### 前日差+自然対数を取る
log_data = pd.DataFrame()

#log_data['nikkei_diff_open_close'] = data['終値'] - data['始値']
log_data['nikkei_diff_open_close_pos'] = (data['終値'] >= data['始値']) * 1 # target 
log_data['nikkei_diff_open_close_neg'] = (data['終値'] <  data['始値']) * 1# target
log_data['nikkei_open_log'] = np.log(data['始値'] / data['始値'].shift())
log_data['nikkei_high_log'] = np.log(data['高値'] / data['高値'].shift())
log_data['nikkei_low_log'] = np.log(data['安値'] / data['安値'].shift())
log_data['nikkei_close_log'] = np.log(data['終値'] / data['終値'].shift())
log_data['nydow_close_log'] = np.log(data['Close'] / data['Close'].shift())
log_data['nikkei_volume'] = np.log(data['出来高'] / data['出来高'].shift())
log_data['exchange'] = np.log(data['USD'] / data['USD'].shift())
log_data['Close_EURONEXT_log'] = np.log(data['Close_EURONEXT'] / data['Close_EURONEXT'].shift())
log_data['Close_ALLORDS_log'] = np.log(data['Close_ALLORDS'] / data['Close_ALLORDS'].shift())
log_data['Close_HANGSENG_log'] = np.log(data['Close_HANGSENG'] / data['Close_HANGSENG'].shift())
log_data['Close_DAX_log'] = np.log(data['Close_DAX'] / data['Close_DAX'].shift())
log_data['Close_NYSE_log'] = np.log(data['Close_NYSE'] / data['Close_NYSE'].shift())
log_data['Close_SP500_log'] = np.log(data['Close_SP500'] / data['Close_SP500'].shift())

log_data['nikkei_diff_open_close_log'] = np.log(data['終値'] / data['始値'])
log_data['Close_EURONEXT_diff_log'] = np.log(data['Close_EURONEXT'] / data['Open_EURONEXT'])
log_data['Close_ALLORDS_diff_log'] = np.log(data['Close_ALLORDS'] / data['Open_ALLORDS'])
log_data['Close_HANGSENG_diff_log'] = np.log(data['Close_HANGSENG'] / data['Open_HANGSENG'])
log_data['Close_DAX_diff_log'] = np.log(data['Close_DAX'] / data['Open_DAX'])
log_data['Close_NYSE_diff_log'] = np.log(data['Close_NYSE'] / data['Open_NYSE'])
log_data['Close_SP500_diff_log'] = np.log(data['Close_SP500'] / data['Open_SP500'])

In [62]:
log_data = log_data.drop(log_data.index[log_data.index == log_data.index[0]])

In [18]:
# 各変数を正規化する 不要
mms = MinMaxScaler()
log_data['nikkei_open_log'] = mms.fit_transform(log_data['nikkei_open_log'].values.reshape(-1, 1))
log_data['nikkei_high_log'] = mms.fit_transform(log_data['nikkei_high_log'].values.reshape(-1, 1))
log_data['nikkei_low_log'] = mms.fit_transform(log_data['nikkei_low_log'].values.reshape(-1, 1))
log_data['nikkei_close_log'] = mms.fit_transform(log_data['nikkei_close_log'].values.reshape(-1, 1))
log_data['nydow_close_log'] = mms.fit_transform(log_data['nydow_close_log'].values.reshape(-1, 1))
log_data['nikkei_volume'] = mms.fit_transform(log_data['nikkei_volume'].values.reshape(-1, 1))
log_data['exchange'] = mms.fit_transform(log_data['exchange'].values.reshape(-1, 1))
log_data['Close_EURONEXT_log'] = mms.fit_transform(log_data['Close_EURONEXT_log'].values.reshape(-1, 1))
log_data['Close_ALLORDS_log'] = mms.fit_transform(log_data['Close_ALLORDS_log'].values.reshape(-1, 1))
log_data['Close_HANGSENG_log'] = mms.fit_transform(log_data['Close_HANGSENG_log'].values.reshape(-1, 1))
log_data['Close_DAX_log'] = mms.fit_transform(log_data['Close_DAX_log'].values.reshape(-1, 1))
log_data['Close_NYSE_log'] = mms.fit_transform(log_data['Close_NYSE_log'].values.reshape(-1, 1))
log_data['Close_SP500_log'] = mms.fit_transform(log_data['Close_SP500_log'].values.reshape(-1, 1))

log_data['nikkei_diff_open_close_log'] = mms.fit_transform(log_data['nikkei_diff_open_close_log'].values.reshape(-1, 1))
log_data['Close_EURONEXT_diff_log'] = mms.fit_transform(log_data['Close_EURONEXT_diff_log'].values.reshape(-1, 1))
log_data['Close_ALLORDS_diff_log'] = mms.fit_transform(log_data['Close_ALLORDS_diff_log'].values.reshape(-1, 1))
log_data['Close_HANGSENG_diff_log'] = mms.fit_transform(log_data['Close_HANGSENG_diff_log'].values.reshape(-1, 1))
log_data['Close_DAX_diff_log'] = mms.fit_transform(log_data['Close_DAX_diff_log'].values.reshape(-1, 1))
log_data['Close_NYSE_diff_log'] = mms.fit_transform(log_data['Close_NYSE_diff_log'].values.reshape(-1, 1))
log_data['Close_SP500_diff_log'] = mms.fit_transform(log_data['Close_SP500_diff_log'].values.reshape(-1, 1))

In [63]:
# 各変数を標準化する（X-μ/σ)
sc = StandardScaler()
log_data['nikkei_open_log'] = sc.fit_transform(log_data['nikkei_open_log'].values.reshape(-1, 1))
log_data['nikkei_high_log'] = sc.fit_transform(log_data['nikkei_high_log'].values.reshape(-1, 1))
log_data['nikkei_low_log'] = sc.fit_transform(log_data['nikkei_low_log'].values.reshape(-1, 1))
log_data['nikkei_close_log'] = sc.fit_transform(log_data['nikkei_close_log'].values.reshape(-1, 1))
log_data['nydow_close_log'] = sc.fit_transform(log_data['nydow_close_log'].values.reshape(-1, 1))
log_data['nikkei_volume'] = sc.fit_transform(log_data['nikkei_volume'].values.reshape(-1, 1))
log_data['exchange'] = sc.fit_transform(log_data['exchange'].values.reshape(-1, 1))
log_data['Close_EURONEXT_log'] = sc.fit_transform(log_data['Close_EURONEXT_log'].values.reshape(-1, 1))
log_data['Close_ALLORDS_log'] = sc.fit_transform(log_data['Close_ALLORDS_log'].values.reshape(-1, 1))
log_data['Close_HANGSENG_log'] = sc.fit_transform(log_data['Close_HANGSENG_log'].values.reshape(-1, 1))
log_data['Close_DAX_log'] = sc.fit_transform(log_data['Close_DAX_log'].values.reshape(-1, 1))
log_data['Close_NYSE_log'] = sc.fit_transform(log_data['Close_NYSE_log'].values.reshape(-1, 1))
log_data['Close_SP500_log'] = sc.fit_transform(log_data['Close_SP500_log'].values.reshape(-1, 1))

log_data['nikkei_diff_open_close_log'] = sc.fit_transform(log_data['nikkei_diff_open_close_log'].values.reshape(-1, 1))
log_data['Close_EURONEXT_diff_log'] = sc.fit_transform(log_data['Close_EURONEXT_diff_log'].values.reshape(-1, 1))
log_data['Close_ALLORDS_diff_log'] = sc.fit_transform(log_data['Close_ALLORDS_diff_log'].values.reshape(-1, 1))
log_data['Close_HANGSENG_diff_log'] = sc.fit_transform(log_data['Close_HANGSENG_diff_log'].values.reshape(-1, 1))
log_data['Close_DAX_diff_log'] = sc.fit_transform(log_data['Close_DAX_diff_log'].values.reshape(-1, 1))
log_data['Close_NYSE_diff_log'] = sc.fit_transform(log_data['Close_NYSE_diff_log'].values.reshape(-1, 1))
log_data['Close_SP500_diff_log'] = sc.fit_transform(log_data['Close_SP500_diff_log'].values.reshape(-1, 1))

In [64]:
logdata_columns = log_data.columns
VARIABLE_LABEL = []
logged_time = 3
for column in logdata_columns:
    for i in range(1, logged_time + 1):
        VARIABLE_LABEL.append('{}_{}'.format(column, i))

In [65]:
VARIABLE_LABEL

['nikkei_diff_open_close_pos_1',
 'nikkei_diff_open_close_pos_2',
 'nikkei_diff_open_close_pos_3',
 'nikkei_diff_open_close_neg_1',
 'nikkei_diff_open_close_neg_2',
 'nikkei_diff_open_close_neg_3',
 'nikkei_open_log_1',
 'nikkei_open_log_2',
 'nikkei_open_log_3',
 'nikkei_high_log_1',
 'nikkei_high_log_2',
 'nikkei_high_log_3',
 'nikkei_low_log_1',
 'nikkei_low_log_2',
 'nikkei_low_log_3',
 'nikkei_close_log_1',
 'nikkei_close_log_2',
 'nikkei_close_log_3',
 'nydow_close_log_1',
 'nydow_close_log_2',
 'nydow_close_log_3',
 'nikkei_volume_1',
 'nikkei_volume_2',
 'nikkei_volume_3',
 'exchange_1',
 'exchange_2',
 'exchange_3',
 'Close_EURONEXT_log_1',
 'Close_EURONEXT_log_2',
 'Close_EURONEXT_log_3',
 'Close_ALLORDS_log_1',
 'Close_ALLORDS_log_2',
 'Close_ALLORDS_log_3',
 'Close_HANGSENG_log_1',
 'Close_HANGSENG_log_2',
 'Close_HANGSENG_log_3',
 'Close_DAX_log_1',
 'Close_DAX_log_2',
 'Close_DAX_log_3',
 'Close_NYSE_log_1',
 'Close_NYSE_log_2',
 'Close_NYSE_log_3',
 'Close_SP500_log_1',


In [66]:
training_test_data = pd.DataFrame()
training_test_data['nikkei_diff_open_close_pos'] = log_data['nikkei_diff_open_close_pos']
training_test_data['nikkei_diff_open_close_neg'] = log_data['nikkei_diff_open_close_neg']

training_test_data['nikkei_open_log_1'] = log_data['nikkei_open_log'].shift(1)
training_test_data['nikkei_open_log_2'] = log_data['nikkei_open_log'].shift(2)
training_test_data['nikkei_open_log_3'] = log_data['nikkei_open_log'].shift(3)

training_test_data['nikkei_high_log_1'] = log_data['nikkei_high_log'].shift(1)
training_test_data['nikkei_high_log_2'] = log_data['nikkei_high_log'].shift(2)
training_test_data['nikkei_high_log_3'] = log_data['nikkei_high_log'].shift(3)

training_test_data['nikkei_low_log_1'] = log_data['nikkei_low_log'].shift(1)
training_test_data['nikkei_low_log_2'] = log_data['nikkei_low_log'].shift(2)
training_test_data['nikkei_low_log_3'] = log_data['nikkei_low_log'].shift(3)

training_test_data['nikkei_close_log_1'] = log_data['nikkei_close_log'].shift(1)
training_test_data['nikkei_close_log_2'] = log_data['nikkei_close_log'].shift(2)
training_test_data['nikkei_close_log_3'] = log_data['nikkei_close_log'].shift(3)

training_test_data['nydow_close_log_1'] = log_data['nydow_close_log'].shift(1)
training_test_data['nydow_close_log_2'] = log_data['nydow_close_log'].shift(2)
training_test_data['nydow_close_log_3'] = log_data['nydow_close_log'].shift(3)

training_test_data['nikkei_volume_1'] = log_data['nikkei_volume'].shift(1)
training_test_data['nikkei_volume_2'] = log_data['nikkei_volume'].shift(2)
training_test_data['nikkei_volume_3'] = log_data['nikkei_volume'].shift(3)

training_test_data['exchange_1'] = log_data['exchange'].shift(1)
training_test_data['exchange_2'] = log_data['exchange'].shift(2)
training_test_data['exchange_3'] = log_data['exchange'].shift(3)

training_test_data['Close_EURONEXT_log_1'] = log_data['Close_EURONEXT_log'].shift(1)
training_test_data['Close_EURONEXT_log_2'] = log_data['Close_EURONEXT_log'].shift(2)
training_test_data['Close_EURONEXT_log_3'] = log_data['Close_EURONEXT_log'].shift(3)

training_test_data['Close_ALLORDS_log_1'] = log_data['Close_ALLORDS_log'].shift(1)
training_test_data['Close_ALLORDS_log_2'] = log_data['Close_ALLORDS_log'].shift(2)
training_test_data['Close_ALLORDS_log_3'] = log_data['Close_ALLORDS_log'].shift(3)

training_test_data['Close_HANGSENG_log_1'] = log_data['Close_HANGSENG_log'].shift(1)
training_test_data['Close_HANGSENG_log_2'] = log_data['Close_HANGSENG_log'].shift(2)
training_test_data['Close_HANGSENG_log_3'] = log_data['Close_HANGSENG_log'].shift(3)

training_test_data['Close_DAX_log_1'] = log_data['Close_DAX_log'].shift(1)
training_test_data['Close_DAX_log_2'] = log_data['Close_DAX_log'].shift(2)
training_test_data['Close_DAX_log_3'] = log_data['Close_DAX_log'].shift(3)

training_test_data['Close_NYSE_log_1'] = log_data['Close_NYSE_log'].shift(1)
training_test_data['Close_NYSE_log_2'] = log_data['Close_NYSE_log'].shift(2)
training_test_data['Close_NYSE_log_3'] = log_data['Close_NYSE_log'].shift(3)

training_test_data['Close_SP500_log_1'] = log_data['Close_SP500_log'].shift(1)
training_test_data['Close_SP500_log_2'] = log_data['Close_SP500_log'].shift(2)
training_test_data['Close_SP500_log_3'] = log_data['Close_SP500_log'].shift(3)

training_test_data['nikkei_diff_open_close_log_1'] = log_data['nikkei_diff_open_close_log'].shift(1)
training_test_data['nikkei_diff_open_close_log_2'] = log_data['nikkei_diff_open_close_log'].shift(2)
training_test_data['nikkei_diff_open_close_log_3'] = log_data['nikkei_diff_open_close_log'].shift(3)

training_test_data['Close_EURONEXT_diff_log_1'] = log_data['Close_EURONEXT_diff_log'].shift(1)
training_test_data['Close_EURONEXT_diff_log_2'] = log_data['Close_EURONEXT_diff_log'].shift(2)
training_test_data['Close_EURONEXT_diff_log_3'] = log_data['Close_EURONEXT_diff_log'].shift(3)

training_test_data['Close_ALLORDS_diff_log_1'] = log_data['Close_ALLORDS_diff_log'].shift(1)
training_test_data['Close_ALLORDS_diff_log_2'] = log_data['Close_ALLORDS_diff_log'].shift(2)
training_test_data['Close_ALLORDS_diff_log_3'] = log_data['Close_ALLORDS_diff_log'].shift(3)

training_test_data['Close_HANGSENG_diff_log_1'] = log_data['Close_HANGSENG_diff_log'].shift(1)
training_test_data['Close_HANGSENG_diff_log_2'] = log_data['Close_HANGSENG_diff_log'].shift(2)
training_test_data['Close_HANGSENG_diff_log_3'] = log_data['Close_HANGSENG_diff_log'].shift(3)

training_test_data['Close_DAX_diff_log_1'] = log_data['Close_DAX_diff_log'].shift(1)
training_test_data['Close_DAX_diff_log_2'] = log_data['Close_DAX_diff_log'].shift(2)
training_test_data['Close_DAX_diff_log_3'] = log_data['Close_DAX_diff_log'].shift(3)

training_test_data['Close_NYSE_diff_log_1'] = log_data['Close_NYSE_diff_log'].shift(1)
training_test_data['Close_NYSE_diff_log_2'] = log_data['Close_NYSE_diff_log'].shift(2)
training_test_data['Close_NYSE_diff_log_3'] = log_data['Close_NYSE_diff_log'].shift(3)

training_test_data['Close_SP500_diff_log_1'] = log_data['Close_SP500_diff_log'].shift(1)
training_test_data['Close_SP500_diff_log_2'] = log_data['Close_SP500_diff_log'].shift(2)
training_test_data['Close_SP500_diff_log_3'] = log_data['Close_SP500_diff_log'].shift(3)

In [67]:
training_test_data = training_test_data.drop(training_test_data.index[training_test_data.index <= '2002-04-15'])
training_test_data = training_test_data.drop(training_test_data.index[training_test_data.index >= '2018-08-24'])

In [68]:
predictor_vars = training_test_data[training_test_data.columns[2:]]
explained_vars = training_test_data[training_test_data.columns[:2]]

In [69]:
# 学習用データは0.8 検証用データは0.1 テストは0.1
len_index = len(training_test_data)
training_data_size = int(len_index * 0.8)
val_data_size = int(len_index * 0.1)
test_data_size = int(len_index * 0.1)

training_predictor_vars = predictor_vars[:training_data_size]
training_explained_vars = explained_vars[:training_data_size]

val_predictor_vars = predictor_vars[training_data_size: training_data_size + val_data_size]
val_explained_vars = explained_vars[training_data_size: training_data_size + val_data_size]

test_predictor_vars = predictor_vars[training_data_size + val_data_size:]
test_explained_vars = explained_vars[training_data_size + val_data_size:]

training_explained_vars = training_explained_vars.drop(columns = ['nikkei_diff_open_close_neg'])
val_explained_vars = val_explained_vars.drop(columns = ['nikkei_diff_open_close_neg'])
test_explained_vars = test_explained_vars.drop(columns = ['nikkei_diff_open_close_neg'])

In [198]:
from functools import partial

reset_graph()

sess = tf.Session()

num_predictors = len(training_predictor_vars.columns)
num_explained = len(training_explained_vars.columns)

neurons = [420, 360, 300, 240, 180, 120] 
he_init = tf.variance_scaling_initializer()
batch_norm_momentum = 0.087
#batch_norm_momentum = 0.9

n_outputs = 1
#scale = 0.009
#scale = 0.00057
#scale = 0.0009
#scale = 0.0007
scale = 0.0008

X = tf.placeholder(tf.float32, shape = (None, num_predictors), name = "X")
y = tf.placeholder(tf.float32, shape = (None, n_outputs), name = "y")
training = tf.placeholder_with_default(False, shape = (), name = "training")

#dropout_rate = 0.31
#dropout_rate = 0.35
#dropout_rate = 0.33
dropout_rate = 0.34
X_drop = tf.layers.dropout(X, dropout_rate, training = training)

with tf.name_scope("DNN"):
    my_batch_norm_layer = partial(tf.layers.batch_normalization, 
                                  training = training, 
                                  momentum = batch_norm_momentum)

    my_dense_layer = partial(tf.layers.dense, 
                             kernel_initializer = he_init, 
                             kernel_regularizer = tf.contrib.layers.l1_regularizer(scale))

    hidden1 = my_dense_layer(X_drop, neurons[len(neurons) - 5], name = "hidden1")
    bn1 = tf.nn.selu(my_batch_norm_layer(hidden1))
    bn1_drop = tf.layers.dropout(bn1, dropout_rate, training = training)
    
    hidden2 = my_dense_layer(bn1_drop, neurons[len(neurons) - 4], name = "hidden2")
    bn2 = tf.nn.selu(my_batch_norm_layer(hidden2))
    bn2_drop = tf.layers.dropout(bn2, dropout_rate, training = training)
    
    hidden3 = my_dense_layer(bn2_drop, neurons[len(neurons) - 3], name = "hidden3")
    bn3 = tf.nn.selu(my_batch_norm_layer(hidden3))
    bn3_drop = tf.layers.dropout(bn3, dropout_rate, training = training)
    
    hidden4 = my_dense_layer(bn3_drop, neurons[len(neurons) - 2], name = "hidden4")
    bn4 = tf.nn.selu(my_batch_norm_layer(hidden4))
    bn4_drop = tf.layers.dropout(bn4, dropout_rate, training = training)
    
    hidden5 = my_dense_layer(bn4_drop, neurons[len(neurons) - 1], name = "hidden5")
    bn5 = tf.nn.selu(my_batch_norm_layer(hidden5))
    bn5_drop = tf.layers.dropout(bn5, dropout_rate, training = training)
    
    logits_bn = my_dense_layer(bn5_drop, n_outputs, name = "output")
    logits = my_batch_norm_layer(logits_bn)

with tf.name_scope("loss"):
    xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = logits)
    base_loss = tf.reduce_mean(xentropy)
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name = "loss")
    #loss = -tf.reduce_sum(X * tf.log(logits))
    
#cost = -tf.reduce_sum(explained_data * tf.log(logits))

optimizer = tf.contrib.opt.NadamOptimizer(learning_rate = 0.000010)
#optimizer = tf.contrib.opt.NadamOptimizer(learning_rate = 0.00016)
training_op = optimizer.minimize(loss)

#correct_prediction = tf.equal(tf.argmax(logits, axis=row), tf.argmax(y, axis=row))
#correct = tf.nn.in_top_k(logits, y, 1)
predicted = tf.nn.sigmoid(logits)
correct_pred = tf.equal(tf.round(predicted), y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [199]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
loss_summary = tf.summary.scalar('loss', loss)
train_accuracy_summary = tf.summary.scalar('train_accuracy', accuracy)
val_accuracy_summary = tf.summary.scalar('val_accuracy', accuracy)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [200]:
#n_epochs = 1000
n_epochs = 5000
batch_size = 103

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

min_loss = 100
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        X_batches = np.array_split(training_predictor_vars.values, len(training_predictor_vars.values) // batch_size)
        y_batches = np.array_split(training_explained_vars.values, len(training_explained_vars.values) // batch_size)
        
        for batch_index, (X_batch, y_batch) in enumerate(zip(X_batches, y_batches)):
            sess.run([training_op, extra_update_ops],
                     feed_dict = {training: True, X: X_batch, y: y_batch })
            
        
        n_batches = len(training_predictor_vars.values) // batch_size
        if epoch % 20 == 0:
            step = epoch * n_batches + batch_index
            loss_summary_str = loss_summary.eval(feed_dict = {X: X_batch, y: y_batch})
            train_acc_summary_str = train_accuracy_summary.eval(feed_dict = {X: X_batch, y: y_batch})
            val_acc_summary_str = val_accuracy_summary.eval(feed_dict = {X: val_predictor_vars.values, y: val_explained_vars.values})
            file_writer.add_summary(loss_summary_str, step)
            file_writer.add_summary(train_acc_summary_str, step)
            file_writer.add_summary(val_acc_summary_str, step)
            
            val_loss = loss.eval(feed_dict = {X: val_predictor_vars.values, y: val_explained_vars.values})
            if min_loss > val_loss:
                saver_path = saver.save(sess, "./tmp/my_model_final.ckpt")
                min_loss = val_loss
                print(step, ':', val_loss)
                
        
        if epoch % 100 == 0:
            print(epoch) 
        
        """if epoch % 100 == 0:
            accuracy_train = accuracy.eval(feed_dict = {X: training_predictor_vars.values, y: training_explained_vars.values.reshape(-1)})
            print(epoch, "train accuracy:", accuracy_train)
            
            accuracy_val = accuracy.eval(feed_dict = {X: val_predictor_vars.values, y: val_explained_vars.values.reshape(-1)})
            print(epoch, "Validation accuracy:", accuracy_val)"""
            
    
    #accuracy_test = accuracy.eval(feed_dict = {X: test_predictor_vars.values, y: test_explained_vars.values.reshape(-1)})
    #print(epoch, "test accuracy:", accuracy_test)      
    
    #saver_path = saver.save(sess, "./tmp/my_model_final.ckpt")

35 : 10.549761
0
755 : 10.233537
1475 : 9.921441
2195 : 9.588928
2915 : 9.221858
3635 : 8.833202
100
4355 : 8.457102
5075 : 8.077936
5795 : 7.6886163
6515 : 7.312763
7235 : 6.938542
200
7955 : 6.576211
8675 : 6.231027
9395 : 5.918155
10115 : 5.621533
10835 : 5.3145394
300
11555 : 5.0272665
12275 : 4.779239
12995 : 4.532042
13715 : 4.321802
14435 : 4.1043687
400
15155 : 3.9129245
15875 : 3.7305048
16595 : 3.5544958
17315 : 3.4027998
18035 : 3.2500775
500
18755 : 3.1215456
19475 : 2.983039
20195 : 2.8890426
20915 : 2.7699919
21635 : 2.6677728
600
22355 : 2.550409
23075 : 2.4619558
23795 : 2.3830948
24515 : 2.3004043
25235 : 2.2123003
700
25955 : 2.1436603
26675 : 2.088919
27395 : 2.0040042
28115 : 1.9443882
28835 : 1.8949102
800
29555 : 1.8347569
30275 : 1.7843673
30995 : 1.7451179
31715 : 1.6856321
32435 : 1.6516347
900
33155 : 1.6060877
33875 : 1.5752015
34595 : 1.5329047
35315 : 1.4971151
36035 : 1.4595821
1000
36755 : 1.427588
37475 : 1.389687
38195 : 1.3674291
38915 : 1.3348825
3963

In [188]:
with tf.Session() as sess:
    saver.restore(sess, "./test 0.57758623/my_model_final.ckpt")
    tf.
    #accuracy_test = accuracy.eval(feed_dict = {X: test_predictor_vars.values, y: test_explained_vars.values})
    #print("test accuracy:", accuracy_test)      

SyntaxError: invalid syntax (<ipython-input-188-0b4a916cac44>, line 3)

In [94]:
n_batches

34

In [108]:
len(training_predictor_vars.values)

3708

In [254]:
with tf.Session() as sess:
    saver.restore(sess, "./tmp2/my_model_final.ckpt")
    accuracy_train = accuracy.eval(feed_dict = {X: training_predictor_vars.values, y: training_explained_vars.values})
    print("train accuracy:", accuracy_train)  
    accuracy_val = accuracy.eval(feed_dict = {X: val_predictor_vars.values, y: val_explained_vars.values})
    print("val accuracy:", accuracy_val)       
    accuracy_test = accuracy.eval(feed_dict = {X: test_predictor_vars.values, y: test_explained_vars.values})
    print("test accuracy:", accuracy_test)      

INFO:tensorflow:Restoring parameters from ./tmp2/my_model_final.ckpt
train accuracy: 0.6113808
val accuracy: 0.57451403
test accuracy: 0.57974136


In [230]:
a = test_predictor_vars.values[1].reshape(-1, 60)
b = test_explained_vars.values[1].reshape(-1, 1)

In [232]:
with tf.Session() as sess:
    saver.restore(sess, "./tmp2/my_model_final.ckpt")
    accuracy_train = predicted.eval(feed_dict = {X: test_predictor_vars.values, y: test_explained_vars.values})
    
    print("train accuracy:", accuracy_train)

INFO:tensorflow:Restoring parameters from ./tmp2/my_model_final.ckpt
train accuracy: [[0.7105289 ]
 [0.46206582]
 [0.5935268 ]
 [0.44990826]
 [0.4568636 ]
 [0.648838  ]
 [0.49487066]
 [0.5059515 ]
 [0.4270607 ]
 [0.39251238]
 [0.5014732 ]
 [0.65321183]
 [0.3961435 ]
 [0.44267926]
 [0.5569222 ]
 [0.56273717]
 [0.49060428]
 [0.44831595]
 [0.55090106]
 [0.46272847]
 [0.55211663]
 [0.5610099 ]
 [0.52601373]
 [0.35516518]
 [0.40085968]
 [0.51715416]
 [0.50112605]
 [0.46359706]
 [0.409473  ]
 [0.53900266]
 [0.48535162]
 [0.5839814 ]
 [0.56843907]
 [0.5401746 ]
 [0.60830986]
 [0.59355706]
 [0.44510347]
 [0.46985865]
 [0.45742333]
 [0.47035578]
 [0.47850406]
 [0.52365893]
 [0.52059245]
 [0.5566388 ]
 [0.4685053 ]
 [0.36317578]
 [0.42728487]
 [0.48753652]
 [0.4871611 ]
 [0.485416  ]
 [0.6316857 ]
 [0.49658597]
 [0.44248736]
 [0.48003238]
 [0.5257623 ]
 [0.56121933]
 [0.5180145 ]
 [0.5619594 ]
 [0.5840343 ]
 [0.48466137]
 [0.4963322 ]
 [0.5045027 ]
 [0.44531035]
 [0.53694564]
 [0.58918947]
 [0.5

In [238]:
accuracy_train2 = np.round(accuracy_train)

In [250]:
tmp = 0
for pre, re in zip(accuracy_train2, test_explained_vars.values):
    if pre[0] == re[0]:
        tmp += 1

In [253]:
tmp/len(accuracy_train2)

0.5797413793103449

464