In [15]:
#Importing Dependencies

import os
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt

In [16]:
# Set random seed

seed = 29
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [17]:
# Get Dataset

df = pd.read_csv("../input/jpx-tokyo-stock-exchange-prediction/train_files/stock_prices.csv")
df

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.000730
1,20170104_1332,2017-01-04,1332,568.0,576.0,563.0,571.0,2798500,1.0,,False,0.012324
2,20170104_1333,2017-01-04,1333,3150.0,3210.0,3140.0,3210.0,270800,1.0,,False,0.006154
3,20170104_1376,2017-01-04,1376,1510.0,1550.0,1510.0,1550.0,11300,1.0,,False,0.011053
4,20170104_1377,2017-01-04,1377,3270.0,3350.0,3270.0,3330.0,150800,1.0,,False,0.003026
...,...,...,...,...,...,...,...,...,...,...,...,...
2332526,20211203_9990,2021-12-03,9990,514.0,528.0,513.0,528.0,44200,1.0,,False,0.034816
2332527,20211203_9991,2021-12-03,9991,782.0,794.0,782.0,794.0,35900,1.0,,False,0.025478
2332528,20211203_9993,2021-12-03,9993,1690.0,1690.0,1645.0,1645.0,7200,1.0,,False,-0.004302
2332529,20211203_9994,2021-12-03,9994,2388.0,2396.0,2380.0,2389.0,6500,1.0,,False,0.009098


In [18]:
# Spliting the data into Training and Validation Set
# Everything from 2021 will be used for validation
# Everything older will be used for training

df_train = df[df['Date'] < '2021-01-01'].copy()
df_train.shape

(1880531, 12)

In [19]:
df_valid = df[df['Date'] >= '2021-01-01'].copy()
df_valid.shape

(452000, 12)

In [20]:
# Using about 20% for validation

print("Precent validation: {}".format(df_valid.shape[0] / df.shape[0] * 100))

Precent validation: 19.37809186673189


In [21]:
num_features = ['Open', 'High', 'Low', 'Close', 'Volume']
target = ['Target']
df_train = df_train[num_features + target].reset_index(drop=True).copy()
df_valid = df_valid[num_features + target].reset_index(drop=True).copy()
df_train.dropna(subset=num_features + target, axis=0, inplace=True)
df_valid.dropna(subset=num_features + target, axis=0, inplace=True)
df_valid.head()

Unnamed: 0,Open,High,Low,Close,Volume,Target
0,2951.0,2951.0,2913.0,2950.0,9700,0.011502
1,428.0,429.0,416.0,421.0,1780500,0.019093
2,2229.0,2231.0,2179.0,2202.0,112400,0.015075
3,1701.0,1701.0,1672.0,1674.0,67900,-0.003503
4,1597.0,1597.0,1577.0,1588.0,4500,-0.012033


In [22]:
# Define encoding function for numerical features
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = layers.Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

In [23]:
# Generate tensorflow dataset
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("Target")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

train_ds = dataframe_to_dataset(df_train)
valid_ds = dataframe_to_dataset(df_valid)

In [24]:
# Batch the dataset
train_ds = train_ds.batch(1024)
valid_ds = valid_ds.batch(1024)

In [25]:
%%time
# Raw numerical features
Open = keras.Input(shape=(1,), name="Open")
High = keras.Input(shape=(1,), name="High")
Low = keras.Input(shape=(1,), name="Low")
Close = keras.Input(shape=(1,), name="Close")
Volume = keras.Input(shape=(1,), name="Volume")

all_inputs = [Open, High, Low, Close, Volume]

# Encode numerical features
open_encoded = encode_numerical_feature(Open, "Open", train_ds)
high_encoded = encode_numerical_feature(High, "High", train_ds)
low_encoded = encode_numerical_feature(Low, "Low", train_ds)
close_encoded = encode_numerical_feature(Close, "Close", train_ds)
volume_encoded = encode_numerical_feature(Volume, "Volume", train_ds)

CPU times: total: 1min 7s
Wall time: 59.5 s


In [12]:
# building a model using a model builder function
def model_builder(hp):
    """
    Build model for hyperparameters tuning
    
    hp: HyperParameters class instance
    """
    
    # defining a set of hyperparametrs for tuning and a range of values for each

    
    
    # building a model
    inputs = tf.keras.Input(shape=(None,5))
    x = layers.Dense(units=hp.Int("units", min_value=32, max_value=512, step=32),activation="relu",)(inputs)
    outputs = layers.Dense(1, activation="linear")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    # model compilation
    model.compile(
            optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"],
    )
    
    return model

In [13]:
# tuner initialization
tuner = kt.Hyperband(hypermodel = model_builder,
                     objective = kt.Objective("val_f1", direction="max"),
                     max_epochs = 20,
					 project_name='hyperband_tuner')

INFO:tensorflow:Reloading Oracle from existing project .\hyperband_tuner\oracle.json


In [14]:
tuner.search(list(train_ds.values()), epochs=2, validation_data=valid_ds)

AttributeError: 'BatchDataset' object has no attribute 'values'

In [26]:
# Concat all features of input layer
all_features = layers.concatenate(
    [
        open_encoded,
        high_encoded,
        low_encoded,
        close_encoded,
        volume_encoded,
    ]
)

# Add several hidden layers with batch_norm and dropout
x = layers.Dense(256, activation="relu")(all_features)


# Output layer for regression task
output = layers.Dense(1, activation="linear")(x)

# Create our NN model
model = keras.Model(all_inputs, output)
model.compile("adam", "mse", metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [None]:
model.fit(train_ds, epochs=50, validation_data=valid_ds)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50

In [None]:
# Save model
model.save("model.h5")