<a href="https://colab.research.google.com/github/nyac-1/trading-financial-announcements/blob/main/model_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U "tensorflow-text==2.8.*"
!pip install -q -U tf-models-official==2.7.0
!pip install -U tfds-nightly

In [2]:
import os
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import tensorflow_text as text  # A dependency of the preprocessing model
import tensorflow_addons as tfa
from official.nlp import optimization
import numpy as np

tf.get_logger().setLevel('ERROR')

os.environ["TFHUB_MODEL_LOAD_FORMAT"]="UNCOMPRESSED"

if tf.config.list_physical_devices('GPU'):
  strategy = tf.distribute.MirroredStrategy()
  print('Using GPU')
else:
  raise ValueError('Running on CPU is not recommended.')

Using GPU


In [3]:
%matplotlib inline

from google.colab import drive; drive.mount('/content/gdrive', force_remount=True)
%cd '/content/gdrive/My Drive/Colab Notebooks'

import pandas as pd
import glob
import numpy as np

Mounted at /content/gdrive
/content/gdrive/My Drive/Colab Notebooks


In [4]:
dir = "/content/gdrive/My Drive/nlp"
df = pd.read_csv(dir + "/latest-3.csv")

df.head()

Unnamed: 0,name,sent,text,dates,return,sd
0,19-May-2021 50 2.pdf,0,export xmlall applicable parts notification fo...,19-May-2021,11.661808,0.096951
1,19-May-2021 37 2.pdf,0,export xmlall applicable parts notification fo...,19-May-2021,11.661808,0.096951
2,17-May-2021 61 2.pdf,0,export xmlall applicable parts notification fo...,17-May-2021,9.912536,0.095899
3,19-Jun-2020 10 2.pdf,0,export xml please read explanatory notes caref...,19-Jun-2020,19.218241,0.233739
4,16-Jun-2020 96 2.pdf,0,export xml please read explanatory notes caref...,16-Jun-2020,11.146497,0.208791


In [5]:
texts = np.array(df['text']).astype('str')
returns = np.array(df.loc[:,['return','sd']]).astype('float32')
sentiment = np.array(df['sent']).astype('float32')

for i, string in enumerate(texts):
    string = string.split(" ")
    texts[i] = " ".join(string[int(len(string)*0.07):int(len(string)*0.95)])

In [6]:
tfhub_handle_encoder = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3"
tfhub_handle_preprocess = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

BERT model selected           : https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3
Preprocess model auto-selected: https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3


In [7]:
def make_bert_preprocess_model(sentence_features, seq_length = 128):
    
  input_segments = [
      tf.keras.layers.Input(shape=(), dtype=tf.string, name=ft)
      for ft in sentence_features]

  bert_preprocess = hub.load(tfhub_handle_preprocess)
  tokenizer = hub.KerasLayer(bert_preprocess.tokenize, name='tokenizer')
  segments = [tokenizer(s) for s in input_segments]

  seq_length = 512
  truncated_segments = segments

  packer = hub.KerasLayer(bert_preprocess.bert_pack_inputs, arguments=dict(seq_length=seq_length), name='packer')
  model_inputs = packer(truncated_segments)

  return tf.keras.Model(input_segments, model_inputs)

preprocessor = make_bert_preprocess_model(['input'])

In [15]:
def build_model():
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
    encoder_inputs = preprocessor(text_input)
    encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=False, name='BERT_encoder')
    outputs = encoder(encoder_inputs)
    net = outputs['pooled_output']

    net = tf.keras.layers.Dropout(0.1)(net)
    net = tf.keras.layers.Dense(128, activation="relu", name='classifierOne', trainable = True)(net)
    net = tf.keras.layers.Dense(32, activation="relu", name='mid', trainable = True)(net)
    net = tf.keras.layers.Dense(16, activation="relu", name='classifierTwo', trainable = True)(net)
    net = tf.keras.layers.Dense(4, activation = "relu")(net)
    model1 = tf.keras.Model(text_input, net)

    inputB = tf.keras.layers.Input(shape=(1,), dtype = tf.int32, name = "sent")
    sent_lay_1 = tf.keras.layers.Dense(32, activation="relu", name='one', trainable = True)(inputB)
    sent_lay_2 = tf.keras.layers.Dense(8, activation="relu", name='two', trainable = True)(sent_lay_1)
    sent_lay_3 = tf.keras.layers.Dense(4, activation="relu", name='three', trainable = True)(sent_lay_2)
    model2 = tf.keras.Model(inputB, sent_lay_3)

    combined = tf.keras.layers.concatenate([model1.output, model2.output])
    final_layer_1 = tf.keras.layers.Dense(8, activation = "relu", name = "merge")(combined)
    final_layer_2 = tf.keras.layers.Dense(2, activation = "linear", name = "op")(final_layer_1)

    final_model = tf.keras.Model(inputs=[model1.input, model2.input], outputs=final_layer_2)

    return final_model


In [16]:
classifier_model = build_model()

In [17]:
classifier_model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam())
history = classifier_model.fit([texts, sentiment], returns,epochs = 200, verbose = 1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [19]:
predictions = classifier_model.predict([texts, sentiment])

In [20]:
predictions 

array([[-0.30245554,  0.2000103 ],
       [ 3.0178504 ,  0.15717717],
       [ 6.761103  ,  0.08700785],
       ...,
       [ 2.782552  ,  0.0918714 ],
       [ 0.9398836 ,  0.17615774],
       [ 2.6525452 ,  0.15254143]], dtype=float32)

In [21]:
new_columns = ['date','return','sd']
new_df = pd.DataFrame(columns = new_columns)

In [22]:
new_df['date'] = df['dates']

In [23]:

new_df.head()

Unnamed: 0,date,return,sd
0,19-May-2021,,
1,19-May-2021,,
2,17-May-2021,,
3,19-Jun-2020,,
4,16-Jun-2020,,


In [25]:
new_df[['return','sd']] = predictions

In [26]:
new_df.head()

Unnamed: 0,date,return,sd
0,19-May-2021,-0.302456,0.20001
1,19-May-2021,3.01785,0.157177
2,17-May-2021,6.761103,0.087008
3,19-Jun-2020,6.581489,0.084075
4,16-Jun-2020,6.581489,0.084075


In [27]:
new_df.to_csv("./final_prediction.csv", index = False)