In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.externals import joblib

df = pd.read_csv("../data/dow_jones_stocks/sentiments/dataset_1/many_sentiments/AAPL.csv")

# add new column Open_before which contains the open values of the previous day
df["Open_before"] = df["Open"].shift(1)

# calculate the procentual change of the open value of the current day to the 
# open value of the day before
df["Open_changes"] = (df["Open"] / df["Open_before"]) - 1

# throw out the first line which has NaN as value because of the previous shift of values 
df = df.dropna()
df = df.reset_index(drop=True)
df = df.drop(columns=['Unnamed: 0'])

# reset index to start by 0
df.index -= 1

# resort data frame by start backwards
df = df[::-1]

changes = df["Open_changes"]
sentiments = df["Sentiment"]

In [8]:
# border to split in test and training data
split_border = int(len(changes) * 0.8)

# build test and training data
train = np.array(changes[:split_border]).reshape(-1, 1)
test = np.array(changes[split_border:]).reshape(-1, 1)

print(train.shape)
print(train)

scaler = MinMaxScaler()

# train the scaler with training data and smooth data
smoothing_window_size = 2500
for di in range(0, 5000, smoothing_window_size):
    scaler.fit(train[di:di+smoothing_window_size,:])
    train[di:di+smoothing_window_size,:] = scaler.transform(train[di:di+smoothing_window_size,:])

# normalize the rest of the data which is len(train) - 7500
scaler.fit(train[di+smoothing_window_size:,:])
train[di+smoothing_window_size:,:] = scaler.transform(train[di+smoothing_window_size:,:])

# normalize test data
test = scaler.transform(test)

# reshape test and train data
train = train.reshape(-1)
test = test.reshape(-1)

# perform exponential moving average
EMA = 0.0
gamma = 0.1
for ti in range(len(train)):
    EMA = gamma * train[ti] + (1 - gamma) * EMA
    train[ti] = EMA
    
# save scaler for later evaluation
joblib.dump(scaler, '../models/normalized_sentiments_scaler.save') 

(7811, 1)
[[ 0.00966983]
 [ 0.01732329]
 [ 0.00949477]
 ...
 [ 0.05806439]
 [ 0.01307253]
 [-0.01923098]]


['../models/normalized_sentiments_scaler.save']

In [9]:
train_len = len(train)
test_len = len(test)

Y_train = np.array(train[:train_len-20])
Y_test = np.array(test[:test_len-20])
X_train = []
X_test = []

# X data with additional value for the sentiment
for i in range(0, len(train)- 20):
    try:
        to_add = train[i+1:i+21].tolist()
        to_add.append(sentiments[i])
        X_train.append(to_add)
    except:
        pass
    
for i in range(0, len(test)- 20):
    try:
        to_add = test[i+1:i+21].tolist()
        to_add.append(sentiments[i])
        X_test.append(to_add)
    except:
        pass
    
# reshape training data
X_train = np.array(X_train).reshape(-1, 21, 1)
X_test = np.array(X_test).reshape(-1, 21, 1)

In [10]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(128, input_shape=(21, 1), return_sequences=True))
model.add(LSTM(64, input_shape=(21,1)))

model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(optimizer="rmsprop", loss="mse")
model.fit(X_train, Y_train, batch_size=32, epochs=10)

model.save('../models/normalized_sentiments.h5')

W1110 08:21:22.638935 14180 deprecation.py:323] From C:\Users\Daniel\.conda\envs\python36\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W1110 08:21:25.341904 14180 deprecation_wrapper.py:119] From C:\Users\Daniel\.conda\envs\python36\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

W1110 08:21:25.364586 14180 deprecation_wrapper.py:119] From C:\Users\Daniel\.conda\envs\python36\lib\site-packages\keras\backend\tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
