In [1]:
import os 
import random
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import GroupShuffleSplit

import tensorflow as tf
from tensorflow import keras, convert_to_tensor, string
from tensorflow import math, matmul, reshape, shape, transpose, cast, float32
from tensorflow import linalg, ones, maximum, newaxis
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Layer, Embedding, MaxPooling1D
from tensorflow.keras.layers import LayerNormalization, ReLU, Dropout
from tensorflow.keras.layers import Activation, Flatten, Conv1D, BatchNormalization
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

from keras import backend as K
from keras.backend import softmax

%load_ext autoreload
%autoreload 2

%matplotlib inline

### Parameters for plotting model results ###
pd.set_option("display.max_colwidth",100)
sns.set(style="ticks", color_codes=True)
plt.rcParams['font.weight'] = 'normal'
plt.rcParams['axes.labelweight'] = 'normal'
plt.rcParams['axes.labelpad'] = 5
plt.rcParams['axes.linewidth']= 2
plt.rcParams['xtick.labelsize']= 14
plt.rcParams['ytick.labelsize']= 14
plt.rcParams['xtick.major.size'] = 6
plt.rcParams['ytick.major.size'] = 6
plt.rcParams['xtick.minor.size'] = 3
plt.rcParams['ytick.minor.size'] = 3
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.minor.width'] = 1
plt.rcParams['xtick.major.width'] = 2
plt.rcParams['ytick.major.width'] = 2
plt.rcParams['xtick.color'] = 'black'
plt.rcParams['ytick.color'] = 'black'
plt.rcParams['axes.labelcolor'] = 'black'
plt.rcParams['axes.edgecolor'] = 'black'

In [2]:
#! pip install focal-loss

In [3]:
from focal_loss import BinaryFocalLoss
from functions import LRScheduler, TransformerModel 

In [4]:
print('tensorflow version: ' + tf. __version__)
print(tf.config.list_physical_devices('CPU'))
print(tf.config.list_physical_devices('GPU'))

tensorflow version: 2.3.4
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
# basic random seed
DEFAULT_RANDOM_SEED = 2022

def seedBasic(seed=DEFAULT_RANDOM_SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
# tensorflow random seed 
def seedTF(seed=DEFAULT_RANDOM_SEED):
    tf.random.set_seed(seed)
    
# torch random seed
# import torch
# def seedTorch(seed=DEFAULT_RANDOM_SEED):
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True
#     torch.backends.cudnn.benchmark = False
      
# basic + tensorflow + torch 
def seedEverything(seed=DEFAULT_RANDOM_SEED):
    seedBasic(seed)
    seedTF(seed)
    # seedTorch(seed)

seedEverything()

In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
channels = [1, 4, 1, 1, 4, 1, 1, 1, 1, 1]

for i in range(10,11):
    model_name='model' + str(i)
    print(model_name)

    channel = channels[i-1]
    if channel == 4:
        padding_method = 'one_hot_encode_padding'
    if channel ==1:
        padding_method = 'encode_padding'

    max_input_length = 600
    dropout_rate = 0.1
    feature_pad_end= '3end'

    import models
    custom_model = getattr(models, model_name)
    import functions
    custom_padding = getattr(functions, padding_method)
    
    for fold in range(1,6):
        print('fold'+str(fold))
        
        data_all=pd.read_csv('data/fold'+str(fold)+'.csv')
        data_train=data_all[data_all['set']=='train']
        data_valid=data_all[data_all['set']=='valid']
    
        data_train = data_train.sample(frac=1).reset_index(drop=True)
        data_valid = data_valid.sample(frac=1).reset_index(drop=True)
    
        X_train=custom_padding(data_train, col='seq', seq_len=max_input_length, padding=feature_pad_end, channel=channel)
        y_train=data_train['label']
        X_valid=custom_padding(data_valid, col='seq', seq_len=max_input_length, padding=feature_pad_end, channel=channel)
        y_valid=data_valid['label']
    
        tf.keras.backend.clear_session()

        model=custom_model(seq_length=max_input_length, dropout_rate=dropout_rate)

        #adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
        adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

        METRICS = [
              keras.metrics.TruePositives(thresholds=0.5, name='tp'),
              keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
              keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
              keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
              keras.metrics.BinaryAccuracy(name='accuracy'),
              keras.metrics.Precision(name='precision'),
              keras.metrics.Recall(name='recall'),
              keras.metrics.AUC(name='auc'),
              keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
        ]

        model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

        model.summary()

        es = EarlyStopping(monitor='val_prc', mode='max', verbose=1, patience=3, restore_best_weights=True)

        model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
                  batch_size=64, epochs=10, verbose=1, callbacks=[es], class_weight={0: 10, 1: 1})

        model.save('model/result/'+model_name+'_'+str(fold))
        with open('model/result/'+model_name+'_'+str(fold)+'.json', 'w') as fp:
            json.dump(model.history.history, fp)
        
        

model10
fold1
Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 600)]             0         
_________________________________________________________________
transformer_model (Transform (None, 600, 16)           15120     
_________________________________________________________________
flatten (Flatten)            (None, 9600)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 32)                307232    
_________________________________________________________________
activation (Activation)      (None, 32)                0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 32) 

In [51]:
tp=[]
fp=[]
tn=[]
fn=[]
precision=[]
recall=[]
auc=[]
prc=[]
fold = []
model = []

for i in range(1,11):
    m='model' + str(i)
    #print(m)
    for j in range(1,6):
        #print(j)
        f = open('model/result/'+ m +'_'+str(j)+'.json')
        result = json.load(f)
        f.close()
        max_prc = max(result['val_prc'])
        index_max = result['val_prc'].index(max_prc)
        tp.append(result['val_tp'][index_max])
        fp.append(result['val_fp'][index_max])
        tn.append(result['val_tn'][index_max])
        fn.append(result['val_fp'][index_max])
        precision.append(result['val_precision'][index_max])
        recall.append(result['val_recall'][index_max])
        auc.append(result['val_auc'][index_max])
        prc.append(max_prc)
        fold.append(j)
        model.append(m)

result=pd.DataFrame({"tp":tp,
                     "fp":fp,
                     "tn":tn,
                     "fn":fn,
                     "precision":precision,
                     "recall":recall,
                     "auc":auc,
                     "prc":prc,
                     "fold":fold,
                     "model":model
                    })


In [52]:
result

Unnamed: 0,tp,fp,tn,fn,precision,recall,auc,prc,fold,model
0,30.0,69.0,7601.0,69.0,0.30303,0.454545,0.949154,0.434607,1,model1
1,178.0,278.0,12589.0,278.0,0.390351,0.419811,0.888406,0.439929,2,model1
2,134.0,22.0,6789.0,22.0,0.858974,0.227891,0.661308,0.382783,3,model1
3,144.0,4376.0,36052.0,4376.0,0.031858,0.503497,0.80534,0.057215,4,model1
4,184.0,10688.0,26452.0,10688.0,0.016924,0.39485,0.552928,0.027669,5,model1
5,20.0,32.0,7638.0,32.0,0.384615,0.30303,0.85118,0.320973,1,model2
6,61.0,44.0,12823.0,44.0,0.580952,0.143868,0.850701,0.298174,2,model2
7,64.0,14.0,6797.0,14.0,0.820513,0.108844,0.686723,0.281093,3,model2
8,138.0,423.0,40005.0,423.0,0.245989,0.482517,0.886486,0.386172,4,model2
9,113.0,2264.0,34876.0,2264.0,0.047539,0.242489,0.65841,0.055895,5,model2


In [55]:
result.groupby(['model']).mean()

Unnamed: 0_level_0,tp,fp,tn,fn,precision,recall,auc,prc,fold
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
model1,134.0,3086.6,17896.6,3086.6,0.320228,0.400119,0.771427,0.268441,3
model10,131.2,3106.8,17876.4,3106.8,0.256601,0.432637,0.783136,0.313343,3
model2,79.2,555.4,20427.8,555.4,0.415922,0.25615,0.7867,0.268462,3
model3,59.4,379.2,20604.0,379.2,0.500499,0.205998,0.675733,0.228969,3
model4,95.2,688.4,20294.8,688.4,0.283132,0.29072,0.712844,0.226519,3
model5,86.2,1202.0,19781.2,1202.0,0.546717,0.263498,0.787015,0.313233,3
model6,89.4,406.8,20576.4,406.8,0.406476,0.29473,0.774781,0.36025,3
model7,109.4,1905.2,19078.0,1905.2,0.287377,0.371988,0.730528,0.31151,3
model8,136.4,1275.4,19707.8,1275.4,0.304785,0.42717,0.765836,0.303166,3
model9,143.8,2185.2,18798.0,2185.2,0.325431,0.460506,0.797332,0.320199,3


In [56]:
result.groupby(['model']).median()

Unnamed: 0_level_0,tp,fp,tn,fn,precision,recall,auc,prc,fold
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
model1,144.0,278.0,12589.0,278.0,0.30303,0.419811,0.80534,0.382783,3
model10,132.0,318.0,12594.0,318.0,0.128767,0.399142,0.817457,0.363167,3
model2,64.0,44.0,12823.0,44.0,0.384615,0.242489,0.850701,0.298174,3
model3,52.0,46.0,12821.0,46.0,0.530612,0.122642,0.662228,0.299596,3
model4,117.0,311.0,12610.0,311.0,0.13522,0.275943,0.78904,0.24201,3
model5,89.0,20.0,12847.0,20.0,0.79798,0.274678,0.776106,0.345711,3
model6,88.0,92.0,12775.0,92.0,0.488889,0.244635,0.756629,0.36712,3
model7,126.0,292.0,12575.0,292.0,0.208531,0.29717,0.752071,0.309086,3
model8,128.0,357.0,12510.0,357.0,0.170213,0.402098,0.746887,0.391623,3
model9,126.0,526.0,12341.0,526.0,0.320413,0.440559,0.824784,0.380237,3


In [36]:
result.groupby(['model']).std()

Unnamed: 0_level_0,tp,fp,tn,fn,precision,recall,auc,prc,fold
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
model1,61.951594,4632.217374,12848.756683,4632.217374,0.343005,0.104555,0.16301,0.207775,1.581139
model2,46.547825,970.236466,15805.283284,970.236466,0.298535,0.148268,0.10567,0.12534,1.581139
model3,41.180092,571.105244,15992.394974,571.105244,0.385722,0.154167,0.135825,0.12203,1.581139
model4,43.922659,807.35172,15818.055576,807.35172,0.332899,0.108256,0.127282,0.108027,1.581139
model5,41.889139,2042.123894,14993.240284,2042.123894,0.452341,0.099284,0.12262,0.130564,1.581139
model6,57.981031,499.953198,15957.256384,499.953198,0.279642,0.192943,0.088815,0.117437,1.581139
model7,38.168049,2392.66059,14083.896744,2392.66059,0.34893,0.173424,0.095163,0.251741,1.581139


In [28]:
result.groupby(['model']).max()

Unnamed: 0_level_0,tp,fp,tn,fn,precision,recall,auc,prc,fold
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
model1,184.0,10688.0,36052.0,10688.0,0.858974,0.503497,0.949154,0.439929,5
model2,138.0,2264.0,40005.0,2264.0,0.820513,0.482517,0.886486,0.386172,5
model3,128.0,1329.0,39923.0,1329.0,0.947368,0.447552,0.888736,0.310905,5
model4,129.0,2033.0,39603.0,2033.0,0.849057,0.451049,0.822278,0.347924,5
model5,128.0,4727.0,39179.0,4727.0,0.952381,0.402098,0.913924,0.435514,5


In [None]:
# embedding + cnn

enc_vocab_size = 5 # Vocabulary size for the encoder
dec_vocab_size = enc_vocab_size # Vocabulary size for the decoder

enc_seq_length = 600  # Maximum length of the input sequence
dec_seq_length = enc_seq_length  # Maximum length of the target sequence

h = 8  # Number of self-attention heads
d_k = 64  # Dimensionality of the linearly projected queries and keys
d_v = 64  # Dimensionality of the linearly projected values
d_ff = 32  # Dimensionality of the inner fully connected layer
d_model = 16  # Dimensionality of the model sub-layers' outputs
n = 1  # Number of layers in the encoder stack
 
dropout_rate = 0.1  # Frequency of dropping the input units in the dropout layers

for fold in range(1,6):
    print(fold)
    data_all=pd.read_csv('data/fold'+str(fold)+'.csv')
    data_train=data_all[data_all['set']=='train']
    data_valid=data_all[data_all['set']=='valid']
    
    data_train = data_train.sample(frac=1).reset_index(drop=True)
    data_valid = data_valid.sample(frac=1).reset_index(drop=True)
    
    X_train=encode_padding(data_train, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=1)
    y_train=data_train['label']
    X_valid=encode_padding(data_valid, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=1)
    y_valid=data_valid['label']
    
    tf.keras.backend.clear_session()

    word_embedding_layer = Embedding(input_dim=enc_vocab_size, output_dim=d_model)
    #training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length,
    #                                      h, d_k, d_v, d_model, d_ff, n, dropout_rate)

    inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
    outputs = word_embedding_layer(inputs)
    outputs = Conv1D(activation="relu", input_shape=(enc_seq_length, d_model), filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Flatten()(outputs)
    outputs = Dense(32)(outputs)
    outputs = Activation('sigmoid')(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Dense(1)(outputs)
    outputs = Activation('sigmoid')(outputs)
    model = keras.Model(inputs=inputs, outputs=outputs)

    #adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
    adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    METRICS = [
          keras.metrics.TruePositives(thresholds=0.5, name='tp'),
          keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
          keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
          keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
          keras.metrics.BinaryAccuracy(name='accuracy'),
          keras.metrics.Precision(name='precision'),
          keras.metrics.Recall(name='recall'),
          keras.metrics.AUC(name='auc'),
          keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

    model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

    model.summary()

    es = EarlyStopping(monitor='val_prc', mode='max', verbose=1, patience=3, restore_best_weights=True)

    model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
              batch_size=256, epochs=10, verbose=1, callbacks=[es])

    model.save('model/embedding16_cnn_fold'+str(fold)+'.h5')
    with open('model/embedding16_cnn_fold'+str(fold)+'.json', 'w') as fp:
        json.dump(model.history.history, fp)

In [None]:
# transformer + cnn

enc_vocab_size = 5 # Vocabulary size for the encoder
dec_vocab_size = enc_vocab_size # Vocabulary size for the decoder

enc_seq_length = 600  # Maximum length of the input sequence
dec_seq_length = enc_seq_length  # Maximum length of the target sequence

h = 8  # Number of self-attention heads
d_k = 64  # Dimensionality of the linearly projected queries and keys
d_v = 64  # Dimensionality of the linearly projected values
d_ff = 32  # Dimensionality of the inner fully connected layer
d_model = 16  # Dimensionality of the model sub-layers' outputs
n = 1  # Number of layers in the encoder stack
 
dropout_rate = 0.1  # Frequency of dropping the input units in the dropout layers

for fold in range(1,6):
    print(fold)
    data_all=pd.read_csv('data/fold'+str(fold)+'.csv')
    data_train=data_all[data_all['set']=='train']
    data_valid=data_all[data_all['set']=='valid']
    
    data_train = data_train.sample(frac=1).reset_index(drop=True)
    data_valid = data_valid.sample(frac=1).reset_index(drop=True)
    
    X_train=encode_padding(data_train, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=1)
    y_train=data_train['label']
    X_valid=encode_padding(data_valid, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=1)
    y_valid=data_valid['label']
    
    tf.keras.backend.clear_session()

    word_embedding_layer = Embedding(input_dim=enc_vocab_size, output_dim=d_model)
    training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length,
                                      h, d_k, d_v, d_model, d_ff, n, dropout_rate)

    inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
    outputs = training_model(inputs, training=True)
    outputs = K.max(outputs,axis=-1)
    outputs = Flatten()(outputs)
    outputs = Dense(32)(outputs)
    outputs = Activation('sigmoid')(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Dense(1)(outputs)
    outputs = Activation('sigmoid')(outputs)
    model = keras.Model(inputs=inputs, outputs=outputs)

    #adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
    adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    METRICS = [
          keras.metrics.TruePositives(thresholds=0.5, name='tp'),
          keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
          keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
          keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
          keras.metrics.BinaryAccuracy(name='accuracy'),
          keras.metrics.Precision(name='precision'),
          keras.metrics.Recall(name='recall'),
          keras.metrics.AUC(name='auc'),
          keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

    model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

    model.summary()

    es = EarlyStopping(monitor='val_prc', mode='max', verbose=1, patience=3, restore_best_weights=True)

    model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
              batch_size=64, epochs=10, verbose=1, callbacks=[es])

    model.save('model/transformer16_fold'+str(fold)+'.h5')
    with open('model/transformer16_fold'+str(fold)+'.json', 'w') as fp:
        json.dump(model.history.history, fp)

In [None]:
tf.keras.models.save_model(model, 'model/transformer16_fold'+str(fold)+'.h5')

In [None]:
# transformer + cnn

enc_vocab_size = 5 # Vocabulary size for the encoder
dec_vocab_size = enc_vocab_size # Vocabulary size for the decoder

enc_seq_length = 600  # Maximum length of the input sequence
dec_seq_length = enc_seq_length  # Maximum length of the target sequence

h = 8  # Number of self-attention heads
d_k = 64  # Dimensionality of the linearly projected queries and keys
d_v = 64  # Dimensionality of the linearly projected values
d_ff = 32  # Dimensionality of the inner fully connected layer
d_model = 16  # Dimensionality of the model sub-layers' outputs
n = 1  # Number of layers in the encoder stack
 
dropout_rate = 0.1  # Frequency of dropping the input units in the dropout layers

for fold in range(1,6):
    print(fold)
    data_all=pd.read_csv('data/fold'+str(fold)+'.csv')
    data_train=data_all[data_all['set']=='train']
    data_valid=data_all[data_all['set']=='valid']
    
    data_train = data_train.sample(frac=1).reset_index(drop=True)
    data_valid = data_valid.sample(frac=1).reset_index(drop=True)
    
    X_train=encode_padding(data_train, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=1)
    y_train=data_train['label']
    X_valid=encode_padding(data_valid, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=1)
    y_valid=data_valid['label']
    
    tf.keras.backend.clear_session()

    word_embedding_layer = Embedding(input_dim=enc_vocab_size, output_dim=d_model)
    training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length,
                                      h, d_k, d_v, d_model, d_ff, n, dropout_rate)

    inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
    outputs = training_model(inputs, training=True)
    #outputs = K.max(outputs,axis=-1)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
    outputs = MaxPooling1D()(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Flatten()(outputs)
    outputs = Dense(32)(outputs)
    outputs = Activation('sigmoid')(outputs)
    outputs = Dropout(dropout_rate)(outputs)
    outputs = Dense(1)(outputs)
    outputs = Activation('sigmoid')(outputs)
    model = keras.Model(inputs=inputs, outputs=outputs)

    #adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
    adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    METRICS = [
          keras.metrics.TruePositives(thresholds=0.5, name='tp'),
          keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
          keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
          keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
          keras.metrics.BinaryAccuracy(name='accuracy'),
          keras.metrics.Precision(name='precision'),
          keras.metrics.Recall(name='recall'),
          keras.metrics.AUC(name='auc'),
          keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

    model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

    model.summary()

    es = EarlyStopping(monitor='val_prc', mode='max', verbose=1, patience=3, restore_best_weights=True)

    model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
              batch_size=64, epochs=10, verbose=1, callbacks=[es])

    model.save('model/transformer16_cnn_fold'+str(fold)+'.h5')
    with open('model/transformer16_cnn_fold'+str(fold)+'.json', 'w') as fp:
        json.dump(model.history.history, fp)

In [None]:
fold

In [None]:
enc_vocab_size = 5 # Vocabulary size for the encoder
dec_vocab_size = enc_vocab_size # Vocabulary size for the decoder

enc_seq_length = 600  # Maximum length of the input sequence
dec_seq_length = enc_seq_length  # Maximum length of the target sequence

fold=1
data_all=pd.read_csv('data/fold'+str(fold)+'.csv')
data_train=data_all[data_all['set']=='train']
data_valid=data_all[data_all['set']=='valid']

data_train = data_train.sample(frac=1).reset_index(drop=True)
data_valid = data_valid.sample(frac=1).reset_index(drop=True)

X_train=encode_padding(data_train, col='seq', seq_len=enc_seq_length, padding=CFG.feature_pad_end, channel=1)
y_train=data_train['label']
X_valid=encode_padding(data_valid, col='seq', seq_len=enc_seq_length, padding=CFG.feature_pad_end, channel=1)
y_valid=data_valid['label']


In [None]:
from functions import LRScheduler, TransformerModel, encode_padding

enc_vocab_size = 5 # Vocabulary size for the encoder
dec_vocab_size = enc_vocab_size # Vocabulary size for the decoder

h = 4  # Number of self-attention heads
d_k = 32  # Dimensionality of the linearly projected queries and keys
d_v = 32  # Dimensionality of the linearly projected values
d_ff = 32  # Dimensionality of the inner fully connected layer
d_model = 128  # Dimensionality of the model sub-layers' outputs
n = 3  # Number of layers in the encoder stack
dropout_rate = 0.1

training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length,
                                      h, d_k, d_v, d_model, d_ff, n, dropout_rate)

inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
outputs = training_model(inputs, training=True)
#outputs = K.max(outputs,axis=-1)
outputs = Conv1D(activation="relu", input_shape=(enc_seq_length, d_model), filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Flatten()(outputs)
outputs = Dense(32)(outputs)
outputs = Activation('sigmoid')(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Dense(1)(outputs)
outputs = Activation('sigmoid')(outputs)
model = keras.Model(inputs=inputs, outputs=outputs)

adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

METRICS = [
          keras.metrics.TruePositives(thresholds=0.5, name='tp'),
          keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
          keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
          keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
          keras.metrics.BinaryAccuracy(name='accuracy'),
          keras.metrics.Precision(name='precision'),
          keras.metrics.Recall(name='recall'),
          keras.metrics.AUC(name='auc'),
          keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

model.summary()

In [None]:
es = EarlyStopping(monitor='val_prc', mode='max', verbose=1, patience=3, restore_best_weights=True)
model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
              batch_size=64, epochs=10, verbose=1, callbacks=[es])

In [None]:
model.history.history

In [None]:
from functions import LRScheduler, TransformerModel, encode_padding

enc_vocab_size = 5 # Vocabulary size for the encoder
dec_vocab_size = enc_vocab_size # Vocabulary size for the decoder

h = 8  # Number of self-attention heads
d_k = 64  # Dimensionality of the linearly projected queries and keys
d_v = 64  # Dimensionality of the linearly projected values
d_ff = 32  # Dimensionality of the inner fully connected layer
d_model = 64  # Dimensionality of the model sub-layers' outputs
n = 1  # Number of layers in the encoder stack
dropout_rate = 0.1

word_embedding_layer = Embedding(input_dim=enc_vocab_size, output_dim=d_model)
training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length,
                                      h, d_k, d_v, d_model, d_ff, n, dropout_rate)

inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
#outputs = training_model(inputs, training=True)
#outputs = K.max(outputs,axis=-1)
outputs = word_embedding_layer(inputs)
outputs = Conv1D(activation="relu", input_shape=(enc_seq_length, d_model), filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = Conv1D(activation="relu", filters=128, kernel_size=8)(outputs)
outputs = MaxPooling1D()(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Flatten()(outputs)
outputs = Dense(32)(outputs)
outputs = Activation('sigmoid')(outputs)
outputs = Dropout(0.1)(outputs)
outputs = Dense(1)(outputs)
outputs = Activation('sigmoid')(outputs)
model = keras.Model(inputs=inputs, outputs=outputs)

adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
adam = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

METRICS = [
          keras.metrics.TruePositives(thresholds=0.5, name='tp'),
          keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
          keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
          keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
          keras.metrics.BinaryAccuracy(name='accuracy'),
          keras.metrics.Precision(name='precision'),
          keras.metrics.Recall(name='recall'),
          keras.metrics.AUC(name='auc'),
          keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

model.summary()

In [None]:
training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length,
                                      h, d_k, d_v, d_model, d_ff, n, dropout_rate)

inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
outputs = training_model(inputs, training=True)
outputs = K.max(outputs,axis=-1)
outputs = Flatten()(outputs)
outputs = Dense(1)(outputs)
outputs = Activation('sigmoid')(outputs)
model = keras.Model(inputs=inputs, outputs=outputs)

adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 

METRICS = [
          keras.metrics.TruePositives(thresholds=0.5, name='tp'),
          keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
          keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
          keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
          keras.metrics.BinaryAccuracy(name='accuracy'),
          keras.metrics.Precision(name='precision'),
          keras.metrics.Recall(name='recall'),
          keras.metrics.AUC(name='auc'),
          keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    ]

model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)
    
model.summary()

In [None]:
valid_split = GroupShuffleSplit(test_size=.20, n_splits=2)

while True:
    split = valid_split.split(data, y, groups=g)
    train_inds, valid_inds = next(split)
    if len(train_inds)/len(valid_inds)>3:
        break
    
data_train=data.iloc[train_inds,:]
data_valid=data.iloc[valid_inds,:]

In [None]:
data_valid.shape

In [None]:
data_train.shape

In [None]:
data_train_pos=data_train[data_train.label==1]
data_train_pos_new = data_train_pos.sample(n=data_train.shape[0], random_state=1, replace=True)

for i in range(data_train_pos_new.shape[0]):
    r=random.uniform(0, 1)
    if r>0.75:
        tmp=random.sample([-1, -2, -3], k=1)
        data_train_pos_new.seq.iloc[i]=data_train_pos_new.seq.iloc[i][:tmp[0]]
    if r >0.5 and r <=0.75:
        tmp=random.sample([1, 2, 3], k=1)
        data_train_pos_new.seq.iloc[i]=data_train_pos_new.seq.iloc[i][tmp[0]:]
    if r >0.25 and r <=0.5:
        tmp=''.join(np.random.choice(add_on, size=random.sample([1,2,3],k=1), replace=True))
        data_train_pos_new.seq.iloc[i]=data_train_pos_new.seq.iloc[i] + tmp
    else:
        tmp=''.join(np.random.choice(add_on, size=random.sample([1,2,3],k=1), replace=True))
        data_train_pos_new.seq.iloc[i]=tmp + data_train_pos_new.seq.iloc[i]

data_train_all = pd.concat([data_train, data_train_pos_new])

In [None]:
data_train_all = data_train_all.sample(frac=1).reset_index(drop=True)

In [None]:
data_train_all['set']='train'
data_valid['set']='valid'

In [None]:
fold=1

In [None]:
data_all = pd.concat([data_train_all, data_valid])
data_all.reset_index(drop=True)
data_all['id']=str(fold)+'_'+data_all.index.astype(str)
data_all

In [None]:
data_all.to_csv('data/fold'+str(fold)+'.csv', index=False)

In [None]:
X_train=one_hot_encode_padding(data_train_all, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=CFG.channel)
y_train=data_train_all['label']
X_valid=one_hot_encode_padding(data_valid, col='seq', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=CFG.channel)
y_valid=data_valid['label']

In [None]:
X_valid.shape

In [None]:
counts = np.bincount(y_valid)
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(y_valid)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
class_weight = {0: weight_for_0, 1: weight_for_1}

In [None]:
np.bincount(y_train)

In [None]:
tf.keras.backend.clear_session()
model = train_model(X_train, y_train, (X_valid, y_valid), channel=CFG.channel,
                   nb_epoch=10, border_mode='same',
                   inp_len=CFG.feature_seq_len, nodes=40, layers=5, nbr_filters=120, filter_len=8, dropout1=0,
                   dropout2=0, dropout3=0.2, patience=3)

In [None]:
model.history.history

In [None]:
IRESbase=pd.read_csv('data/IRESbase_new.csv')

In [None]:
IRESbase=pd.read_csv('data/RF00031.csv')

In [None]:
IRESbase

In [None]:
X_test=one_hot_encode_padding(IRESbase, col='IRES.Sequence', seq_len=CFG.feature_seq_len, padding=CFG.feature_pad_end, channel=CFG.channel)

In [None]:
y_test = model.predict(X_test)

In [None]:
sum(y_test>0.5)

In [None]:
y_test = model.predict(X_valid)

In [None]:
tf.keras.backend.clear_session()

In [None]:
X_valid.shape

In [None]:
enc_vocab_size = 20 # Vocabulary size for the encoder
input_seq_length = 5  # Maximum length of the input sequence
h = 8  # Number of self-attention heads
d_k = 8  # Dimensionality of the linearly projected queries and keys
d_v = 8  # Dimensionality of the linearly projected values
d_ff = 8  # Dimensionality of the inner fully connected layer
d_model = 8  # Dimensionality of the model sub-layers' outputs
n = 6  # Number of layers in the encoder stack

batch_size = 64  # Batch size from the training process
dropout_rate = 0.1  # Frequency of dropping the input units in the dropout layers
 
input_seq = np.random.random((batch_size, input_seq_length))
 
encoder = Encoder(enc_vocab_size, input_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)
print(encoder(input_seq, None, True))

In [None]:
input_seq.shape

In [None]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [None]:
def encode_padding(df, col='utr', seq_len=50, padding='5end', channel=1):
    # 5end padding means pad the left end (5' end) if sequence length < seq_len; keep the seq_len right end (3' end) if  sequence length > seq_len
    # 3end padding means pad the right end (3' end) if sequence length < seq_len; keep the seq_len left end (5' end) if  sequence length > seq_len
    # Dictionary returning one-hot encoding of nucleotides. 
    nuc_d = {'a':[1],'c':[2],'g':[3],'t':[4], 'n':[0], '(':[5],')':[6],'.':[7]}
    
    # Creat empty matrix.
    vectors=np.zeros([len(df),seq_len,channel])
    
    # Iterate through UTRs and one-hot encode
    for i,seq in enumerate(df[col]):
        if(isinstance(seq, str)):
            if(padding=='3end'):
                seq=seq[:min(len(seq),seq_len)]
            if(padding=='5end'):
                seq=seq[max(0,(len(seq)-seq_len)):len(seq)]
            seq = seq.lower()
            a = np.array([nuc_d[x] for x in seq])
            if(padding=='5end'):
                vectors[i, (seq_len-len(seq)):seq_len] = a
            if(padding=='3end'):
                vectors[i, :len(seq)] = a
    return vectors

In [None]:
X_train=np.squeeze(encode_padding(data_train_all, col='seq', seq_len=200, padding=CFG.feature_pad_end, channel=1))
y_train=data_train_all['label']
X_valid=np.squeeze(encode_padding(data_valid, col='seq', seq_len=200, padding=CFG.feature_pad_end, channel=1))
y_valid=data_valid['label']
#m_valid=np.squeeze(tf.math.equal(X_valid, 0))*1

In [None]:
tf.keras.backend.clear_session()

In [None]:
enc_vocab_size = 64 # Vocabulary size for the encoder
dec_vocab_size = 64 # Vocabulary size for the decoder
 
enc_seq_length = 200  # Maximum length of the input sequence
dec_seq_length = 200  # Maximum length of the target sequence

h = 8  # Number of self-attention heads
d_k = 32  # Dimensionality of the linearly projected queries and keys
d_v = 32  # Dimensionality of the linearly projected values
d_ff = 64  # Dimensionality of the inner fully connected layer
d_model = 64  # Dimensionality of the model sub-layers' outputs
n = 3  # Number of layers in the encoder stack
 
dropout_rate = 0.1  # Frequency of dropping the input units in the dropout layers

# Create model
training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)

inputs = tf.keras.layers.Input(shape=(enc_seq_length,))
outputs = training_model(inputs, training=True)
outputs = Flatten()(outputs)
outputs = Dense(1)(outputs)
outputs = Activation('sigmoid')(outputs)

In [None]:
model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

In [None]:
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
class LRScheduler(LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000, **kwargs):
        super(LRScheduler, self).__init__(**kwargs)
 
        self.d_model = cast(d_model, float32)
        self.warmup_steps = warmup_steps
 
    def __call__(self, step_num):
 
        # Linearly increasing the learning rate for the first warmup_steps, and decreasing it thereafter
        arg1 = step_num ** -0.5
        arg2 = step_num * (self.warmup_steps ** -1.5)
 
        return (self.d_model ** -0.5) * math.minimum(arg1, arg2)

In [None]:
adam = tf.keras.optimizers.Adam(LRScheduler(d_model), beta_1=0.9, beta_2=0.999, epsilon=1e-08) 

METRICS = [
      keras.metrics.TruePositives(thresholds=0.5, name='tp'),
      keras.metrics.FalsePositives(thresholds=0.5,name='fp'),
      keras.metrics.TrueNegatives(thresholds=0.5,name='tn'),
      keras.metrics.FalseNegatives(thresholds=0.5,name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

model.compile(loss=BinaryFocalLoss(gamma=2), metrics=METRICS, optimizer=adam)

es = EarlyStopping(monitor='val_prc', mode='max', verbose=1, patience=3, restore_best_weights=True)

model.fit(X_train, y_train, validation_data=(X_valid, y_valid),
          batch_size=256, epochs=10, verbose=1, callbacks=[es])

In [None]:
model.history.history

In [None]:
training_model.build_graph().summary()

In [None]:
tf.keras.preprocessing.sequence.pad_sequences(IRESbase['IRES.Sequence'].to_numpy(), 
                                              value='N', padding='post', truncating='post', maxlen=600, dtype='object')

In [None]:
tf.keras.layers.TextVectorization('ATCG')

In [None]:
vocab_data = ["A", "C", "G", "T"]
max_len = CFG.feature_seq_len

vectorize_layer = tf.keras.layers.TextVectorization(
 max_tokens=max_features,
 output_mode='int',
 output_sequence_length=max_len,
 vocabulary=vocab_data)


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
t  = Tokenizer(num_words=5,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=False, char_level=True, oov_token=None,
    document_count=0)
fit_text = "ACGTN"
t.fit_on_texts(fit_text)

test_text = "NCGTA"
sequences = t.texts_to_sequences(test_text)

print("sequences : ",sequences,'\n')

print("word_index : ", t.word_index)

In [None]:
data_valid['seq'].iloc[0]

In [None]:
test=t.texts_to_sequences(data_valid['seq'].iloc[0])

In [None]:
class PositionEmbeddingFixedWeights(Layer):
    def __init__(self, sequence_length, vocab_size, output_dim, **kwargs):
        super(PositionEmbeddingFixedWeights, self).__init__(**kwargs)
        word_embedding_matrix = self.get_position_encoding(vocab_size, output_dim)   
        position_embedding_matrix = self.get_position_encoding(sequence_length, output_dim)                                          
        self.word_embedding_layer = Embedding(
            input_dim=vocab_size, output_dim=output_dim,
            weights=[word_embedding_matrix],
            trainable=False
        )
        self.position_embedding_layer = Embedding(
            input_dim=sequence_length, output_dim=output_dim,
            weights=[position_embedding_matrix],
            trainable=False
        )
             
    def get_position_encoding(self, seq_len, d, n=10000):
        import numpy as np
        P = np.zeros((seq_len, d))
        for k in range(seq_len):
            for i in np.arange(int(d/2)):
                denominator = np.power(n, 2*i/d)
                P[k, 2*i] = np.sin(k/denominator)
                P[k, 2*i+1] = np.cos(k/denominator)
        return P


    def call(self, inputs):        
        position_indices = tf.range(tf.shape(inputs)[-1])
        embedded_words = self.word_embedding_layer(inputs)
        embedded_indices = self.position_embedding_layer(position_indices)
        return embedded_words + embedded_indices

In [None]:
padding_mask

In [None]:
tf.squeeze(test)

In [None]:
np.array(test).shape

In [None]:
len(test)

In [None]:
IRESbase

In [None]:
from tensorflow.keras.preprocessing.text import tokenizer_from_json
with open('nucl.json') as f:
    contents = f.readlines()
    t = tokenizer_from_json(contents[0])

In [None]:
contents[0]