In [1]:
import tensorflow as tf

from tensorflow.keras.layers import LSTM,RepeatVector,Dense,TimeDistributed,Masking
from tensorflow.keras.models import Sequential

import numpy as np
import csv
from sklearn.preprocessing import MaxAbsScaler

In [2]:
with open("out.csv",'r') as dest_f:
    data_iter = csv.reader(dest_f,delimiter = ",")
    data = [np.array(data,dtype=float) for data in data_iter]
    # data = [x.reshape(x.shape[0],1,1) for x in data]
data_array = np.asarray(data, dtype = object)
data_array

array([array([-5.728300e+04,  3.977700e+04, -6.790000e+02,  6.890000e+02,
              -6.860000e+02,  3.580600e+04, -9.303100e+04,  2.492600e+04,
              -7.317200e+04,  6.364400e+04, -4.240000e+02,  1.578920e+05,
              -1.579700e+05,  3.025380e+05, -1.891900e+05,  4.849880e+05,
              -1.665630e+05,  6.406870e+05, -2.500000e+02,  5.879590e+05,
              -3.920000e+02,  6.813980e+05, -7.901100e+04,  7.381430e+05,
              -1.400000e+01,  7.754500e+05, -1.910900e+04,  8.193670e+05,
              -8.768000e+03,  8.273280e+05, -5.966000e+03,  8.513770e+05,
              -1.359450e+05,  9.164240e+05, -3.000000e+01,  8.258470e+05,
              -6.304000e+03,  9.435790e+05, -7.947500e+04,  1.016912e+06,
              -9.156000e+03,  9.909580e+05, -7.000000e+00,  1.010833e+06,
              -7.948200e+04,  1.114136e+06, -2.448800e+04,  1.115702e+06,
              -5.766200e+04,  1.109164e+06, -3.644900e+04,  1.117681e+06,
              -8.762800e+04,  1.246175

In [3]:
with open("out_authors.csv",'r') as dest_f:
    data_iter = csv.reader(dest_f,delimiter = ",")
    data = [np.array(data,dtype=str) for data in data_iter]
    # data = [x.reshape(x.shape[0],1,1) for x in data]
author_id = np.asarray(data, dtype = object)
author_id

array([['1000010400946118656'],
       ['1000042745380134912'],
       ['1000130222338002944'],
       ...,
       ['999978890058706944'],
       ['999983906026901504'],
       ['999993840982482944']], dtype=object)

In [5]:
padded_array = tf.keras.preprocessing.sequence.pad_sequences(data_array,padding="post")
# padded_array = np.transpose(padded_array)
padded_array

array([[ -57283,   39777,    -679, ...,       0,       0,       0],
       [ -21373,  138347,   -7167, ...,       0,       0,       0],
       [ -86517,   49365,  -93966, ...,       0,       0,       0],
       ...,
       [  -5947,     689,  -71657, ...,       0,       0,       0],
       [-413612,  413576,   -3811, ...,       0,       0,       0],
       [ -68050,   50895,   -3077, ...,       0,       0,       0]])

In [6]:
# reshape input into [samples, timesteps, features]
padded_array = padded_array.reshape((padded_array.shape[0],padded_array.shape[1],1))
padded_array.shape

(11117, 28092, 1)

In [7]:
scaler = MaxAbsScaler()
padded_array = scaler.fit_transform(padded_array.reshape(-1, padded_array.shape[-1])).reshape(padded_array.shape)

In [8]:
# masked_value = scaler.transform([[0]])[0][0]
masked_value = 0
masked_value

0

In [9]:
# padded_array = np.where(padded_array==scaler.transform([[0]]), 0, padded_array)

In [10]:
# padded_array

In [11]:
timesteps = padded_array.shape[1]  # Length of your sequences
features = padded_array.shape[2]
inter_dim = 64
latent_dim = 16

model = Sequential()
model.add(Masking(mask_value=masked_value, input_shape=(timesteps, features,)))
model.add(LSTM(inter_dim, activation='tanh',return_sequences=True))
model.add(LSTM(latent_dim, activation='tanh',return_sequences=False))
model.add(RepeatVector(timesteps))
model.add(LSTM(latent_dim, activation='tanh', return_sequences=True))
model.add(LSTM(inter_dim, activation='tanh', return_sequences=True))
model.add(TimeDistributed(Dense(1,activation="relu")))

# model.compile(optimizer='adam', loss='mse',metrics=['accuracy'])
model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking (Masking)           (None, 28092, 1)          0         
                                                                 
 lstm (LSTM)                 (None, 28092, 64)         16896     
                                                                 
 lstm_1 (LSTM)               (None, 16)                5184      
                                                                 
 repeat_vector (RepeatVector  (None, 28092, 16)        0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 28092, 16)         2112      
                                                                 
 lstm_3 (LSTM)               (None, 28092, 64)         20736     
                                                        

In [13]:
# checkpoint
callbacks_list = [
    tf.keras.callbacks.ModelCheckpoint("training_checkpoints/saved-model-{epoch:02d}-{accuracy:.2f}.hdf5", verbose=0, monitor='accuracy', save_best_only=False, save_weights_only=False, mode='auto', save_freq="epoch"),
    tf.keras.callbacks.ModelCheckpoint("saved-model-best.hdf5", monitor='accuracy', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', save_freq="epoch"),
    ]

In [14]:
model.fit(padded_array,padded_array,callbacks=callbacks_list, epochs=1 ,batch_size=16)



<keras.callbacks.History at 0x2375655c790>

In [15]:
model.save("trained_model.h5")