In [92]:
# import toolkit
# import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import pandas as pd

In [93]:
# import sentimental data
amazon_df = pd.read_csv('amazon_df.csv')
gme_df = pd.read_csv('gme_df.csv')
kodak_df = pd.read_csv('kodak_df.csv')
facebook_df = pd.read_csv('fb_df.csv')

In [94]:
# renaming columns
amazon_df.rename({'0':'Date', '1':'Positive', '2':'Neutral', '3':'Negative', '4':'Sentimental Score'}, axis=1, inplace=True)
gme_df.rename({'0':'Date', '1':'Positive', '2':'Neutral', '3':'Negative', '4':'Sentimental Score'}, axis=1, inplace=True)
kodak_df.rename({'0':'Date', '1':'Positive', '2':'Neutral', '3':'Negative', '4':'Sentimental Score'}, axis=1, inplace=True)
facebook_df.rename({'0':'Date', '1':'Positive', '2':'Neutral', '3':'Negative', '4':'Sentimental Score'}, axis=1, inplace=True)

# dropping columns

amazon_df.drop(columns=['Unnamed: 0'], inplace=True)
gme_df.drop(columns=['Unnamed: 0'], inplace=True)
kodak_df.drop(columns=['Unnamed: 0'], inplace=True)
facebook_df.drop(columns=['Unnamed: 0'], inplace=True)


In [95]:
# import price data
amazon_price_df = pd.read_csv('price/AMZN_1.csv')
gme_price_df = pd.read_csv('price/GME_1.csv')
kodak_price_df = pd.read_csv('price/KODK_1.csv')
facebook_price_df = pd.read_csv('price/FB_1.csv')

In [96]:
# merging DB

amazon_df = pd.merge(amazon_df, amazon_price_df, how="outer", on=["Date", "Date"])
gme_df = pd.merge(gme_df, gme_price_df, how="outer", on=["Date", "Date"] )
kodak_df = pd.merge(kodak_df, kodak_price_df, how="outer", on=["Date", "Date"] )
facebook_df = pd.merge(facebook_df, facebook_price_df, how="outer", on=["Date", "Date"] )


print(amazon_df.shape)
print(kodak_df.shape)
print(gme_df.shape)
print(facebook_df.shape)

(862, 11)
(653, 11)
(726, 11)
(855, 11)


In [97]:
facebook_df.drop(columns=['Date'],inplace=True)
kodak_df.drop(columns=['Date'],inplace=True)
gme_df.drop(columns=['Date'],inplace=True)
amazon_df.drop(columns=['Date'],inplace=True)

In [98]:
facebook_df['retorno'] = facebook_df['Close'].pct_change()
kodak_df['retorno'] = kodak_df['Close'].pct_change()
gme_df['retorno'] = gme_df['Close'].pct_change()
amazon_df['retorno'] = amazon_df['Close'].pct_change()

In [105]:
from sklearn.impute import KNNImputer, SimpleImputer

In [106]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
preprocessor = ColumnTransformer([('imputer',  KNNImputer(n_neighbors=1),['Open','Low','Close','Adj Close', 'Volume'])], remainder='passthrough')
filling_na = SimpleImputer(strategy="constant", fill_value=-1000)
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor),('fill_na', filling_na)])

In [69]:
facebook_df.isna().sum()

Positive               8
Neutral                8
Negative               8
Sentimental Score      8
Open                 219
High                 219
Low                  219
Close                219
Adj Close            219
Volume               219
retorno                2
dtype: int64

In [115]:
#facebook_df = facebook_df[['Open','Low','Close','Adj Close', 'Volume']].apply(lambda x:preprocessor.fit_transform(x))
gme_np = my_pipeline.fit_transform(gme_df) 
gme_np

array([[ 1.60000000e+02,  1.58009995e+02,  1.66820007e+02, ...,
        -6.67000000e-01,  1.71990005e+02, -1.00000000e+03],
       [ 1.80490005e+02,  1.65070007e+02,  1.67619995e+02, ...,
         5.00000000e-01,  1.82380005e+02,  4.79551592e-03],
       [ 1.87679993e+02,  1.79000000e+02,  1.80059998e+02, ...,
         1.30000000e+00,  1.88789993e+02,  7.42155075e-02],
       ...,
       [ 1.85300003e+02,  1.76149994e+02,  1.78850006e+02, ...,
        -1.00000000e+03,  1.87690002e+02, -3.74575753e-02],
       [ 1.81000000e+02,  1.73839996e+02,  1.80360001e+02, ...,
        -1.00000000e+03,  1.81600006e+02,  8.44280095e-03],
       [ 1.80360001e+02,  1.78759995e+02,  1.83940002e+02, ...,
        -1.00000000e+03,  1.86039993e+02,  1.98491959e-02]])

In [116]:
gme_np = my_pipeline.fit_transform(gme_df) 
kodak_np = my_pipeline.fit_transform(kodak_df)
facebook_np = my_pipeline.fit_transform(facebook_df)
amazon_np = my_pipeline.fit_transform(amazon_df)

In [117]:
news_stock_np = np.array([gme_np,kodak_np,amazon_np,facebook_np])

  news_stock_np = np.array([gme_np,kodak_np,amazon_np,facebook_np])


In [118]:
#reddit_day.fillna(-1000,inplace=True)
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [119]:
news_stock_np.shape

(4,)

In [120]:
news_stock_np_pad = pad_sequences(news_stock_np,value=-1000, dtype='float32') # int32 by default
news_stock_np_pad

array([[[-1.0000000e+03, -1.0000000e+03, -1.0000000e+03, ...,
         -1.0000000e+03, -1.0000000e+03, -1.0000000e+03],
        [-1.0000000e+03, -1.0000000e+03, -1.0000000e+03, ...,
         -1.0000000e+03, -1.0000000e+03, -1.0000000e+03],
        [-1.0000000e+03, -1.0000000e+03, -1.0000000e+03, ...,
         -1.0000000e+03, -1.0000000e+03, -1.0000000e+03],
        ...,
        [ 1.8530000e+02,  1.7614999e+02,  1.7885001e+02, ...,
         -1.0000000e+03,  1.8769000e+02, -3.7457574e-02],
        [ 1.8100000e+02,  1.7384000e+02,  1.8036000e+02, ...,
         -1.0000000e+03,  1.8160001e+02,  8.4428005e-03],
        [ 1.8036000e+02,  1.7875999e+02,  1.8394000e+02, ...,
         -1.0000000e+03,  1.8603999e+02,  1.9849196e-02]],

       [[-1.0000000e+03, -1.0000000e+03, -1.0000000e+03, ...,
         -1.0000000e+03, -1.0000000e+03, -1.0000000e+03],
        [-1.0000000e+03, -1.0000000e+03, -1.0000000e+03, ...,
         -1.0000000e+03, -1.0000000e+03, -1.0000000e+03],
        [-1.0000000e+03, 

In [121]:
news_stock_np_pad.shape

(4, 862, 11)

In [122]:
x=news_stock_np_pad[:,:-1,:]
x.shape

(4, 861, 11)

In [123]:
y=news_stock_np_pad[:,-1,-1]
y.shape

(4,)

In [124]:
y_train =y[0:3]
y_test = y[-1:]

In [125]:
x_train = x[0:3,:,:]
x_test = x[-1:,:,:]

In [126]:
y

array([0.0198492 , 0.00137552, 0.0118087 , 1.225369  ], dtype=float32)

In [127]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers.experimental.preprocessing import Normalization
def init_model():
    #normalizar o x train
    normalizer = Normalization()
    model = models.Sequential()
    model.add(layers.Masking(mask_value=-1000.))
    normalizer.adapt(x_train)
    model.add(layers.LSTM(20, return_sequences=True, activation='tanh'))
    model.add(layers.LSTM(10, return_sequences=False, activation='tanh'))
    model.add(layers.Dense(5, activation='relu'))
    model.add(layers.Dense(10, activation='linear'))
    
  #  model.compile(loss='mse', 
  #                optimizer='rmsprop', 
  #                metrics=['mae'])
    model.compile(loss='mse', 
                  optimizer=RMSprop(learning_rate=0.0003), 
                  metrics=['mae'])
    
    return model

#init_model().summary()

In [128]:
model=init_model()

2021-07-31 11:17:47.448022: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-07-31 11:17:47.650083: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


In [132]:
model.fit(x_train,y_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x15bb06520>

In [130]:
model.predict(x_test)

array([[ 0.19145155, -0.04493175,  0.1359333 , -0.1457363 ,  0.17241699,
        -0.04693845, -0.16149047,  0.0534225 ,  0.09431215,  0.01208889]],
      dtype=float32)