<a href="https://colab.research.google.com/github/toonzzzrock/project/blob/main/TCN_CONvLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# import

In [None]:
!pip install -q keras-tcn --no-dependencies
from tcn import TCN, tcn_full_summary

In [None]:
!pip install time-series-generator

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting time-series-generator
  Downloading time_series_generator-0.2.8-py3-none-any.whl (8.1 kB)
Installing collected packages: time-series-generator
Successfully installed time-series-generator-0.2.8


In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras import Model, Input, Sequential
from tensorflow.keras.layers import Dense, Masking, LSTM, Embedding, Dropout, ConvLSTM2D, MaxPooling3D, BatchNormalization, TimeDistributed, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.activations import gelu
from tensorflow.keras.metrics import RootMeanSquaredError as rmse
from tensorflow.keras.utils import plot_model

from sklearn.model_selection import train_test_split

In [None]:
from time_series_generator import TimeseriesGenerator as datagen

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [None]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
from tqdm import tqdm
import time

import re
import requests
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
Stock_data = pd.read_parquet(r'/content/gdrive/MyDrive/Fund/X_train_stock.parquet.gzip')  
News_data = pd.read_parquet(r'/content/gdrive/MyDrive/Fund/News_data_clean.parquet.gzip')  

# Clean data

In [None]:
num_words = 20000
embedding_vecor_length = 256
#max_review_length = 800
input_length = 30
output_length = 5

News

In [None]:
tokenizer = Tokenizer(num_words=num_words,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True,
    split=' ')
tokenizer.fit_on_texts(News_data['all_text'])

In [None]:
X_News_token = tokenizer.texts_to_sequences(News_data['all_text'])

In [None]:
# truncate and pad input sequences
X_News_train = sequence.pad_sequences(X_News_token)

In [None]:
X_News_train.shape

Stock

In [None]:
Stock_data = Stock_data.fillna(1)

In [None]:
Stock_data = Stock_data.drop(columns = ['date'])

In [None]:
Stock_data2 = np.log(Stock_data / Stock_data.shift(1))

In [None]:
Stock_data2 = Stock_data2.fillna(0)

Data generator

In [None]:
X_train_all = pd.concat([X_News_token, Stock_data2], axis = 1).to_numpy()

In [None]:
X_dataset = datagen(X_train, Stock_data2['Close'], length=input_length, length_output=output_length, batch_size=128, sampling_rate = 5)

# model

In [None]:
def My_ConvLSTM_Model(frames, channels, stock_indicator, max_review_length, predict_frame):
  
    trailer_input  = [Input(shape=(frames, channels, max_review_length), name='News'),
                      Input(shape=(frames, stock_indicator), name='Stock movement')
    ]

    
    Embedding_layer = Embedding(num_words, 
                             embedding_vecor_length, 
                             input_length=max_review_length,
                             trainable=False,
                             mask_zero=True,
                             name = 'News_embedding_layer'
    )(trailer_input[0])

    Masking_layer = Masking(mask_value=0.0, name = 'Masking_News_layer')(Embedding_layer)

    first_ConvLSTM = ConvLSTM2D(filters=20, kernel_size=(16, 16)
                       , data_format='channels_first'
                       , recurrent_activation='hard_sigmoid'
                       , activation='tanh'
                       , padding='same', return_sequences=True)(Masking_layer)

    first_BatchNormalization = BatchNormalization()(first_ConvLSTM)
    first_Pooling = MaxPooling3D(pool_size=(1, 8, 4), padding='same', data_format='channels_first')(first_BatchNormalization)
    
    first_ConvLSTM = ConvLSTM2D(filters=10, kernel_size=(16, 16)
                        , data_format='channels_first'
                        , recurrent_activation='hard_sigmoid'
                       , activation='tanh'
                       , padding='same', return_sequences=True)(first_Pooling)
    first_BatchNormalization = BatchNormalization()(first_ConvLSTM)
    first_Pooling = MaxPooling3D(pool_size=(1, 4, 4), padding='same', data_format='channels_first')(first_BatchNormalization)

    branch_ConvLSTM = ConvLSTM2D(filters=5, kernel_size=(8, 8)
                        , data_format='channels_first'
                        , stateful = False
                        , kernel_initializer='random_uniform'
                        , padding='same', return_sequences=True)(first_Pooling)
    branch_Pooling = MaxPooling3D(pool_size=(1, 4, 2), padding='same', data_format='channels_first')(branch_ConvLSTM)

    flat_layer = TimeDistributed(Flatten())(branch_Pooling)
    target = TimeDistributed(Dense(256))(flat_layer)
    target = TimeDistributed(Dense(64))(target)
    target = TimeDistributed(Dense(16))(target)

    flat = Flatten()(target)
    flat_BatchNormalization = BatchNormalization()(flat)
    first_dropout = Dropout(0.25)(flat_BatchNormalization)
    
    dense_layer = Dense(256, activation=gelu)(first_dropout)
    flat_BatchNormalization = BatchNormalization()(dense_layer)
    first_dropout = Dropout(0.4)(flat_BatchNormalization)
    
    dense_layer = Dense(64, activation=gelu)(first_dropout)
    flat_BatchNormalization = BatchNormalization()(dense_layer)
    News_output = Dropout(0.4)(flat_BatchNormalization)
    #-----------------------------

    TCN_layer = TCN(input_shape=(frames, stock_indicator), nb_filters=128, return_sequences=True, dilations=[1, 2, 4, 8, 16, 32])(trailer_input[1])
    TCN_layer2 = TCN(nb_filters=64, return_sequences=True, dilations=[1, 2, 4, 8, 16])(TCN_layer)

    flat = Flatten()(TCN_layer2)
    Stock_dense_layer = Dense(64, activation=gelu)(flat)
    Stock_BatchNormalization = BatchNormalization()(Stock_dense_layer)
    Stock_output = Dropout(0.4)(Stock_BatchNormalization)
    
    concat_layers = tf.concat([News_output, Stock_output], 0)
    outputs = Dense(predict_frame, activation='linear')(concat_layers)
    
    seq = Model(inputs=trailer_input, outputs=outputs, name='Model ')
    
    return seq


In [None]:
model = My_ConvLSTM_Model(frames = 60, channels = 1, stock_indicator = 10, max_review_length = 1024, predict_frame = 5)

In [None]:
model.summary()

In [None]:
plot_model(model, show_shapes=True)

# train