## Model Development

Objective: Develop a custom transformer model using Keras framework and then enhance the project by retraining a pre-trained model for comparision.

Hints:
- Modular Code: Keep your code modular to facilitate easy switching between the custom model and the BART model for different experiments.
- Documentation: Document each step in your model development process, including parameter settings and the rationale behind chosen architectures.
- Version Control: Commit all changes, especially new scripts and configurations, to GitHub to maintain a robust version history.
- Continuous Monitoring: Regularly monitor training progress using TensorBoard integrated with Keras to visualize performance metrics.

#### Imports

In [38]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Dropout
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D

import numpy as np
import pandas as pd

In [32]:
# Check if GPU enabled 
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Develop Custom Transformer Model with Keras

#### Get the data

In [83]:
#data = np.load('../data/cnn_dailymail.npz', allow_pickle=True)
#data = np.load('../data/news_api_data.npz', allow_pickle=True)
#data = np.load('../data/cnn_dailymail_small.npz', allow_pickle=True)
data = np.load('../data/news_api_data_small.npz', allow_pickle=True)

X_train = data['X_train']
y_train = data['y_train']
X_test = data['X_test']
y_test = data['y_test']
X_val = data['X_val']
y_val = data['y_val']

In [87]:
X = np.concatenate((X_train, X_test, X_val))
y = np.concatenate((y_train, y_test, y_val))
max_len = len(max(X, key=len))
max_len

17180

In [88]:
tokenizer = Tokenizer(num_words=max_len)
tokenizer.fit_on_texts(X)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
X_val = tokenizer.texts_to_sequences(X_val)
y_train = tokenizer.texts_to_sequences(y_train)
y_test = tokenizer.texts_to_sequences(y_test)
y_val = tokenizer.texts_to_sequences(y_val)


In [89]:
# Standardize Data by padding sequences
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)
X_val = pad_sequences(X_val, maxlen=max_len)

### Select and Retrain Pre-trained Model

In [75]:
# Build the Transformer Block
def transformer_block(x):

    # MultiHead Attention
    attn_output = MultiHeadAttention(num_heads=2, key_dim=64)(x, x)
    attn_output = Dropout(.2)(attn_output)
    out1 = LayerNormalization()(x + attn_output)

    # Feed Forward Network
    ffn_output = Dense(2048, activation='relu')(out1)
    ffn_output = Dropout(.2)(ffn_output)
    ffn_output = Dense(64, activation='relu')(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    return out2

In [76]:
def build_model(max_len):

  vocab_size = 10000
 
  inputs = Input(shape=(max_len,))
  embedding_layer = Embedding(vocab_size, 64)(inputs)

  x = transformer_block(embedding_layer)
  x = GlobalAveragePooling1D()(x)
  x = Dropout(0.1)(x) # add droput layer
  outputs = Dense(2, activation="softmax")(x) # dense layer

  model = Model(inputs=inputs, outputs=outputs) # define inputs and outputs
  model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) # define params
  return model

In [77]:
model = build_model(max_len)

In [78]:
history = model.fit(
    X_train, y_train, batch_size=32, epochs=2, validation_data=(X_val, y_val)
)

ValueError: Unrecognized data type: x=[[  0   0   0 ...   1 199 200]
 [  0   0   0 ...  93  11 209]
 [  0   0   0 ...  54 111 113]
 [  0   0   0 ...  15   4 354]
 [  0   0   0 ... 368  16 369]
 [  0   0   0 ...  32 378   9]] (of type <class 'numpy.ndarray'>)

In [None]:
# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test, batch_size=128)

### Setup MLflow for Experiment tracking

### Training and Evaluation Setup