In [39]:
import tensorflow_hub as hub
import tensorflow as tf 
import tensorflow_text
import keras
import tensorflow_addons as tfa

In [5]:
# Set random seed for reproducibility
tf.keras.utils.set_random_seed(42)

In [49]:
def rnn_bert(n_classes = 3, layers_shape = (32, 32), rnn_cell = 'lstm', learning_rate=3e-5, label_smoothing=0.1):

    # URLS to BERT encoder and preprocess models on tfhub
    bert_model_url = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1'
    bert_preprocess_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'

    # Model will take in whole text sequences
    text_input = tf.keras.Input(shape=(), dtype=tf.string, name='text')

    # Preprocess the input for BERT encoder
    preprocess_layer = hub.KerasLayer(bert_preprocess_url, name='preprocess')

    encoder_input = preprocess_layer(text_input)

    # Encode the input
    bert_encoder = hub.KerasLayer(bert_model_url, name='bert_encoder')
    bert_output = bert_encoder(encoder_input)
    
    sequence = bert_output['sequence_output']   
    # Add RNN layers
    last_layer = sequence
    for i in range(len(layers_shape)):
        # Run the sequnece through the RNN layer
        ret_seq = True
        if i == len(layers_shape) - 1:
            ret_seq = False

        if rnn_cell == 'lstm':
            new_layer = tf.keras.layers.LSTM(layers_shape[i], return_sequences=ret_seq)(last_layer)

        elif rnn_cell == 'gru':
            new_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.GRU(layers_shape[i], return_sequences=ret_seq),
                merge_mode='sum')(last_layer)

        last_layer = new_layer

    # Predict the resulting classes with a softmax
    classes_pred = tf.keras.layers.Dense(n_classes, activation='softmax')(last_layer)

    # Set metrics for accuracy
    metrics= [
        tf.keras.metrics.CategoricalAccuracy(name='accuracy'), 
        tfa.metrics.F1Score(n_classes, average='weighted', name='f1_weighted'),
        tfa.metrics.F1Score(n_classes, average='macro', name='f1_macro'),
        tfa.metrics.FBetaScore(n_classes, average='macro', beta=0.5, name='f_0.5 macro')
        ]
  
    # Finalize model
    model = keras.Model(inputs=text_input, outputs=classes_pred)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), 
        loss=tf.losses.CategoricalCrossentropy(label_smoothing=label_smoothing), 
        metrics=metrics
    )

    print(model.summary())

    return model

In [34]:
import re
import numpy as np

def preprocess(data : np.ndarray):
    """
    Text Preprocessor 
 
    Preprocess the text, replacing links with '[link]' and other
    special character removals.

    :param data: Source Dataframe
    :return: Preprocessed text
    """

    # Initial text cleanup
    for i in range(data.shape[0]):
        post = data[i]
        post = re.sub('\n', ' ', post) # remove newline chars
        post = re.sub('[\(\)\[\]\{\}]', ' ', post) # remove braces
        post = post.lower()
        data.iloc[i] = re.sub('http[\w:/\.\=\#\?\-\$\&]+', '[link]', post)

    return data

In [53]:
from sklearn.model_selection import train_test_split, StratifiedKFold

def create_dataset(data, idxs):
    train_data, val_data, train_target, val_target = train_test_split(data.text[idxs], data.target[idxs], test_size=0.2, shuffle=True, random_state=42, stratify=data.target[idxs])
    
    train_ds = tf.data.Dataset.from_tensor_slices((train_data, train_target))
    val_ds = tf.data.Dataset.from_tensor_slices((val_data, val_target))

    return train_ds, val_ds

def train_model(data):
    data.text = preprocess(data.text)
    n_classes = len(data.target.unique())
    
    folder = StratifiedKFold()
    tf.debugging.disable_traceback_filtering()
    for train_idxs, test_idxs in folder.split(data.text, data.target):
        train_ds, val_ds = create_dataset(data, train_idxs)
        model = rnn_bert(n_classes)
        model.fit(train_ds, validation_data = val_ds, epochs=100)

        # Calculate metrics
        loss, accuracy, f1_macro, f1_weighted, f05_macro = model.evaluate(data.text[test_idxs], data.target[test_idxs], use_multiprocessing=True)

        print(f'Acc : {accuracy}, F1 : {f1_macro}, F1_weighted : {f1_weighted}, F05 : {f05_macro}')

In [35]:
import pandas as pd

# Load data
data = pd.read_json('..\\Data\\train_dataset_alt_new.json', encoding='utf-8')
data

Unnamed: 0,text,type,id,target
0,DFV's Roaring Kitty Spreadsheet Recreated with...,reddit_post,qicrti,2
1,From an open-source data engineer... Is stock ...,reddit_post,qia0tt,2
2,DWAC-Warrant / long-dated call arbitrage? Long...,reddit_post,qhvtqi,2
3,"$CHWY DD This DD is short, because the logic i...",reddit_post,qhnq2y,2
4,Time to Hedge Long Equity Exposure In my opini...,reddit_post,qh7yld,2
...,...,...,...,...
1652,#GME #Gamestop still out of stock on PS5 https...,tweet,1375079563844280324,0
1653,#GME #GameStop don't be alarmed. This morning ...,tweet,1466788507494592512,1
1654,"Ironically, GameStop might well stop the whole...",tweet,1354525266547236865,1
1655,@DiamondApe8 @Mindy831518 @jkarn88 @MInk547076...,tweet,1476305947442044930,1


In [54]:
train_model(data)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.iloc[i] = re.sub('http[\w:/\.\=\#\?\-\$\&]+', '[link]', post)


Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 preprocess (KerasLayer)        {'input_type_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128)}                                                    



ValueError: in user code:

    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\distribute\distribute_lib.py", line 1315, in run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\distribute\distribute_lib.py", line 2891, in call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\distribute\distribute_lib.py", line 3692, in _call_for_each_replica
        return fn(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 61, in error_handler
        return fn(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 557, in __call__
        return super().__call__(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 61, in error_handler
        return fn(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
        outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 100, in error_handler
        raise e
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
        return fn(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 510, in call
        return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 61, in error_handler
        return fn(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
        outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 155, in error_handler
        raise new_e.with_traceback(e.__traceback__) from None
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
        return fn(*args, **kwargs)
    File "C:\Users\Samo\AppData\Local\Temp\__autograph_generated_filetfzdalb5.py", line 74, in tf__call  **
        ag__.if_stmt(ag__.not_(ag__.ld(self)._has_training_argument), if_body_3, else_body_3, get_state_3, set_state_3, ('result', 'training'), 1)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 1363, in if_stmt
        _py_if_stmt(cond, body, orelse)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\autograph\operators\control_flow.py", line 1416, in _py_if_stmt
        return body() if cond else orelse()
    File "C:\Users\Samo\AppData\Local\Temp\__autograph_generated_filetfzdalb5.py", line 72, in else_body_3
        result = ag__.converted_call(ag__.ld(smart_cond).smart_cond, (ag__.ld(training), ag__.autograph_artifact(lambda : ag__.converted_call(ag__.ld(f), (), dict(training=True), fscope)), ag__.autograph_artifact(lambda : ag__.converted_call(ag__.ld(f), (), dict(training=False), fscope))), None, fscope)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\framework\smart_cond.py", line 54, in smart_cond  **
        return false_fn()
    File "C:\Users\Samo\AppData\Local\Temp\__autograph_generated_filetfzdalb5.py", line 72, in <lambda>
        result = ag__.converted_call(ag__.ld(smart_cond).smart_cond, (ag__.ld(training), ag__.autograph_artifact(lambda : ag__.converted_call(ag__.ld(f), (), dict(training=True), fscope)), ag__.autograph_artifact(lambda : ag__.converted_call(ag__.ld(f), (), dict(training=False), fscope))), None, fscope)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\saved_model\load.py", line 704, in _call_attribute  **
        return instance.__call__(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 141, in error_handler
        return fn(*args, **kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\def_function.py", line 915, in __call__
        result = self._call(*args, **kwds)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\def_function.py", line 954, in _call
        results = self._stateful_fn(*args, **kwds)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\function.py", line 2495, in __call__
        filtered_flat_args) = self._maybe_define_function(args, kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\function.py", line 2760, in _maybe_define_function
        graph_function = self._create_graph_function(args, kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\function.py", line 2670, in _create_graph_function
        func_graph_module.func_graph_from_py_func(
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\framework\func_graph.py", line 1247, in func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\def_function.py", line 677, in wrapped_fn
        out = weak_wrapped_fn().__wrapped__(*args, **kwds)
    File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\saved_model\function_deserialization.py", line 295, in restored_function_body
        raise ValueError(

    ValueError: Exception encountered when calling layer "preprocess" "                 f"(type KerasLayer).
    
    in user code:
    
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow_hub\keras_layer.py", line 237, in call  *
            result = smart_cond.smart_cond(training,
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\saved_model\load.py", line 704, in _call_attribute  **
            return instance.__call__(*args, **kwargs)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 141, in error_handler
            return fn(*args, **kwargs)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\def_function.py", line 915, in __call__
            result = self._call(*args, **kwds)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\def_function.py", line 954, in _call
            results = self._stateful_fn(*args, **kwds)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\function.py", line 2495, in __call__
            filtered_flat_args) = self._maybe_define_function(args, kwargs)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\function.py", line 2760, in _maybe_define_function
            graph_function = self._create_graph_function(args, kwargs)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\function.py", line 2670, in _create_graph_function
            func_graph_module.func_graph_from_py_func(
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\framework\func_graph.py", line 1247, in func_graph_from_py_func
            func_outputs = python_func(*func_args, **func_kwargs)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\def_function.py", line 677, in wrapped_fn
            out = weak_wrapped_fn().__wrapped__(*args, **kwds)
        File "c:\Users\Samo\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\saved_model\function_deserialization.py", line 295, in restored_function_body
            raise ValueError(
    
        ValueError: Could not find matching concrete function to call loaded from the SavedModel. Got:
          Positional arguments (3 total):
            * <tf.Tensor 'inputs:0' shape=() dtype=string>
            * False
            * None
          Keyword arguments: {}
        
         Expected these arguments to match one of the following 4 option(s):
        
        Option 1:
          Positional arguments (3 total):
            * TensorSpec(shape=(None,), dtype=tf.string, name='sentences')
            * False
            * None
          Keyword arguments: {}
        
        Option 2:
          Positional arguments (3 total):
            * TensorSpec(shape=(None,), dtype=tf.string, name='sentences')
            * True
            * None
          Keyword arguments: {}
        
        Option 3:
          Positional arguments (3 total):
            * TensorSpec(shape=(None,), dtype=tf.string, name='inputs')
            * False
            * None
          Keyword arguments: {}
        
        Option 4:
          Positional arguments (3 total):
            * TensorSpec(shape=(None,), dtype=tf.string, name='inputs')
            * True
            * None
          Keyword arguments: {}
    
    
    Call arguments received by layer "preprocess" "                 f"(type KerasLayer):
      • inputs=tf.Tensor(shape=(), dtype=string)
      • training=True
