In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.utils import to_categorical
import IO
from keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')



In [2]:
X_train, X_test, y_train, y_test, X_train_pad, X_test_pad, y_train_cat, y_test_cat, _ = IO.load_training_data("MenKreuz")

In [3]:
# # Define the model
# model = Sequential()
# model.add(Embedding(input_dim=10000, output_dim=128, input_length=100))
# model.add(Bidirectional(LSTM(units=128, return_sequences=True)))
# model.add(Dropout(0.5))
# model.add(Bidirectional(LSTM(units=64)))
# model.add(Dense(units=2, activation='softmax'))
# 
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [5]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, Input, LayerNormalization, Dropout, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Transformer parameters
maxlen = 100
vocab_size = 10000
embed_dim = 128
num_heads = 4
ff_dim = 128
dropout_rate = 0.5
num_classes = 2

class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.projection_dim = embed_dim // num_heads
        self.query_dense = Dense(embed_dim)
        self.key_dense = Dense(embed_dim)
        self.value_dense = Dense(embed_dim)
        self.combine_heads = Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model():
    inputs = Input(shape=(maxlen,))
    embedding_layer = Embedding(vocab_size, embed_dim)(inputs)
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)
    x = transformer_block(embedding_layer)
    x = GlobalAveragePooling1D()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(20, activation="relu")(x)
    x = Dropout(dropout_rate)(x)
    outputs = Dense(num_classes, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Build and compile the Transformer model
transformer_model = build_transformer_model()
transformer_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
transformer_model.summary()


TypeError: missing a required argument: 'training'

In [None]:
# from keras.models import Sequential
# from keras.layers import Embedding, LSTM, Dropout, Dense, BatchNormalization, Bidirectional
# from keras.losses import BinaryCrossentropy
# from keras.optimizers import Adam
# from keras.regularizers import l2
# from keras.callbacks import ReduceLROnPlateau
# 
# # Define the model
# model = Sequential([
#     Embedding(input_dim=10000, output_dim=128, input_length=100),
#     Bidirectional(LSTM(128, return_sequences=True)),
#     BatchNormalization(),  # Batch Normalization to stabilize learning
#     Dropout(0.5),
#     Bidirectional(LSTM(128)),
#     BatchNormalization(),
#     Dropout(0.5),
#     Dense(64, activation='relu', kernel_regularizer=l2(0.001)),  # Dense layer with L2 regularization
#     Dropout(0.5),
#     Dense(2, activation='softmax')  # Output layer
# ])
# 
# # Compile the model
# model.compile(
#     loss=BinaryCrossentropy(from_logits=True),
#     optimizer=Adam(1e-4),
#     metrics=['accuracy']
# )
# 
# # Learning rate scheduler callback
# lr_scheduler = ReduceLROnPlateau(
#     monitor='val_loss',
#     factor=0.5,
#     patience=3,
#     min_lr=1e-6,
#     verbose=1
# )

In [None]:
history = model.fit(X_train_pad, y_train_cat, epochs=30, batch_size=32, validation_data=(X_test_pad, y_test_cat))


In [None]:
loss, accuracy = model.evaluate(X_test_pad, y_test_cat)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')


In [None]:
IO.save_model(model, history, 'lstm', "transformer")