In [1]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
import tensorflow as tf
from time import time
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers import LSTM
from tensorflow.keras.optimizers import Adam
from time import time
from keras.callbacks import EarlyStopping
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()

df = pd.read_csv("twitter_training.csv",parse_dates=True,index_col="Date")
# df = df[1563:]
print(df.corr()['Close'])
df = df[['Open', 'High', 'Low','Close','sentiment_score']]
print("---dataframe head---")
print(df.head())

print("--scaling data---")
data = sc.fit_transform(df) 
train_ind = int(0.6*len(df))
val_ind = train_ind + int(0.2*len(df))

train = data[:train_ind]
val = data[train_ind:val_ind]
test = data[val_ind:]

print("--shapes--")
print("train,test,val",train.shape, test.shape, val.shape)

xtrain,ytrain,xval,yval,xtest,ytest = train[:,:5],train[:,3],val[:,:5],val[:,3],test[:,:5],test[:,3]

lookback = 60
n_features = 5
train_len = len(xtrain) - lookback
test_len = len(xtest) - lookback
val_len = len(xval) - lookback

x_train = np.zeros((train_len, lookback, n_features))
y_train = np.zeros((train_len))
for i in range(train_len):
    ytemp = i+lookback
    x_train[i] = xtrain[i:ytemp]
    y_train[i] = ytrain[ytemp]
print("x_train", x_train.shape)
print("y_train", y_train.shape)

x_test = np.zeros((test_len, lookback, n_features))
y_test = np.zeros((test_len))
for i in range(test_len):
    ytemp = i+lookback
    x_test[i] = xtest[i:ytemp]
    y_test[i] = ytest[ytemp]
print("x_test", x_test.shape)
print("y_test", y_test.shape)

x_val = np.zeros((val_len, lookback, n_features))
y_val = np.zeros((val_len))
for i in range(val_len):
    ytemp = i+lookback
    x_val[i] = xval[i:ytemp]
    y_val[i] = yval[ytemp]
print("x_val", x_val.shape)
print("y_val", y_val.shape)

model = Sequential()  
model.add(LSTM(600,input_shape = (lookback, n_features), return_sequences=True))
model.add(LSTM(700))
model.add(Dropout(0.15))
model.add(Dense(1))
print(model.summary())

model.compile(loss = 'mse', optimizer = 'adam')
earlystop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=80,  verbose=1, mode='min')

start = time()
print("start:",0)
history = model.fit(x_train,y_train, epochs = 400, batch_size=30, 
          validation_data=(x_val,y_val),verbose = 1, 
          shuffle = False, callbacks=[earlystop])
print("endtime:",time()-start)

model.save("./models/model_vader7.h5")
loss = history.history
plt.plot(loss['loss'])
plt.plot(loss['val_loss'])
plt.savefig("./plots/loss_vader7.jpg")

model.save("./models/model_vader6.h5")
loss = history.history
plt.plot(loss['loss'])
plt.plot(loss['val_loss'])
plt.savefig("./plots/loss_vader6.jpg")
plt.show()
# model = load_model("./models/model_vader1.h5")
y_pred = model.predict(x_test)
# print(model.summary())


plt.figure(figsize=(20,10))
plt.plot( y_test, '.-', color='red', label='Real values', alpha=0.5)
plt.plot( y_pred, '.-', color='blue', label='Predicted values', alpha=1)
plt.savefig("./plots/result_vader7.jpg")
plt.savefig("./plots/result_vader6.jpg")
plt.show()

print("r2_score:",r2_score(y_pred,y_test))


sentiment_score   -0.120703
Open               0.991182
High               0.996195
Low                0.995533
Close              1.000000
Out               -0.103699
Name: Close, dtype: float64
---dataframe head---
                  Open        High         Low       Close  sentiment_score
Date                                                                       
2020-11-01  394.000000  406.980000  392.300000  400.500000         0.305093
2020-11-02  394.000000  406.980011  392.299988  400.510010         0.061635
2020-11-03  409.730011  427.769989  406.690002  423.899994         0.180670
2020-11-04  430.619995  435.399994  417.100006  420.980011         0.534486
2020-11-05  428.299988  440.000000  424.000000  438.089996         0.279628
--scaling data---
--shapes--
train,test,val (237, 5) (79, 5) (79, 5)
x_train (177, 60, 5)
y_train (177,)
x_test (19, 60, 5)
y_test (19,)
x_val (19, 60, 5)
y_val (19,)


2021-12-06 21:10:31.088613: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2021-12-06 21:10:31.088636: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2021-12-06 21:10:31.088869: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 600)           1454400   
                                                                 
 lstm_1 (LSTM)               (None, 700)               3642800   
                                                                 
 dropout (Dropout)           (None, 700)               0         
                                                                 
 dense (Dense)               (None, 1)                 701       
                                                                 
Total params: 5,097,901
Trainable params: 5,097,901
Non-trainable params: 0
_________________________________________________________________
None
start: 0
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoc

Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78/400
Epoch 79/400
Epoch 80/400
Epoch 81/400
Epoch 82/400
Epoch 83/400
Epoch 84/400
Epoch 85/400
Epoch 86/400
Epoch 87/400
Epoch 88/400
Epoch 89/400
Epoch 90/400
Epoch 91/400
Epoch 92/400
Epoch 93/400
Epoch 94/400
Epoch 95/400
Epoch 96/400
Epoch 97/400
Epoch 98/400
Epoch 99/400
Epoch 100/400
Epoch 101/400
Epoch 102/400
Epoch 103/400
Epoch 104/400
Epoch 105/400
Epoch 106/400
Epoch 00106: early stopping
endtime: 310.97559213638306
r2_score: -2.806721944213503


In [2]:
y_pred3 = model.predict(x_train)


In [4]:
print(y_pred3)

[[0.31742042]
 [0.33421808]
 [0.35309458]
 [0.37066546]
 [0.36695448]
 [0.37783983]
 [0.3885431 ]
 [0.39514163]
 [0.42937177]
 [0.4796895 ]
 [0.52030575]
 [0.5540923 ]
 [0.5349953 ]
 [0.53298104]
 [0.5292028 ]
 [0.52582407]
 [0.51204866]
 [0.49777865]
 [0.49247304]
 [0.4887007 ]
 [0.50710636]
 [0.50794363]
 [0.50584584]
 [0.5105157 ]
 [0.51085544]
 [0.5089895 ]
 [0.5159109 ]
 [0.5190889 ]
 [0.5137817 ]
 [0.5096271 ]
 [0.48684877]
 [0.46030325]
 [0.44942153]
 [0.46424833]
 [0.4920578 ]
 [0.49938673]
 [0.4916763 ]
 [0.51518637]
 [0.52112204]
 [0.5176707 ]
 [0.5112549 ]
 [0.49521303]
 [0.47761145]
 [0.46143222]
 [0.4722612 ]
 [0.4662333 ]
 [0.46289754]
 [0.4548308 ]
 [0.44303182]
 [0.43738368]
 [0.43244267]
 [0.42498234]
 [0.41935176]
 [0.41241542]
 [0.3942393 ]
 [0.371534  ]
 [0.3725206 ]
 [0.35088637]
 [0.32992068]
 [0.31057063]
 [0.30827516]
 [0.3273695 ]
 [0.3336131 ]
 [0.31723312]
 [0.2902079 ]
 [0.25688523]
 [0.24043038]
 [0.23176308]
 [0.21848054]
 [0.2428351 ]
 [0.27089915]
 [0.30