In [1]:
import numpy as np
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import backend
import pandas as pd
import tensorflow as tf
import random
import os
from keras import layers
from sklearn.metrics import confusion_matrix

NUM_TOKEN = 5000
MAX_PRO_LEN = 64
MAX_TXT_LEN = 256
NO_EPO = 60
NO_BAT = 128

MACHINE_1_P = "./data/set1_machine.json"
HUMAN_1_P = "./data/set1_human.json"
MACHINE_2_P = "./data/set2_machine.json"
HUMAN_2_P = "./data/set2_human.json"
TEST_P = "./data/test.json"
RANDOM_SEED = 42
MACHINE_IND = 0
HUMAN_IND = 1
TEST_FRA = 0.01

class DomainData:
    """
    train_test_split, pad_sequence, PCA, class_weight
    """
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def t_t_spli(self, test_size, random_state):
        self.random_state = random_state
        self.train_x, self.test_x, self.train_y, self.test_y = train_test_split(self.x, self.y, test_size=test_size, stratify = self.y, random_state = random_state)
        self.train_x = self.train_x.reset_index(drop=True)
        self.train_y = self.train_y.reset_index(drop=True)
        self.test_x = self.test_x.reset_index(drop=True)
        self.test_y = self.test_y.reset_index(drop=True)
        

    def add_padding(self, padding, prompt_len, txt_len):
        self.train_prompt = self.train_x["prompt"]
        self.train_txt = self.train_x["txt"]
        self.train_label = self.train_y.to_numpy()
        self.test_prompt = self.test_x["prompt"]
        self.test_txt = self.test_x["txt"]
        self.test_label = self.test_y.to_numpy()
        unique_classes = np.unique(self.train_label)
        class_weights = class_weight.compute_class_weight("balanced", classes=unique_classes, y=self.train_y)
        self.class_weights = dict(zip(unique_classes, class_weights))
        
        self.prompt_len = prompt_len
        self.txt_len = txt_len
        
        self.train_prompt = pad_sequences(self.train_prompt, padding=padding, maxlen=prompt_len)
        self.train_txt = pad_sequences(self.train_txt, padding=padding, maxlen=txt_len)
        self.test_prompt = pad_sequences(self.test_prompt, padding=padding, maxlen=prompt_len)
        self.test_txt = pad_sequences(self.test_txt, padding=padding, maxlen=txt_len)
        
        
    def down_sampling(self):
        mac_ind = self.train_y[self.train_y == MACHINE_IND].index.to_list()
        hum_ind = self.train_y[self.train_y == HUMAN_IND].index.to_list()
        lower = min(len(mac_ind), len(hum_ind))
        sel_lit = mac_ind[:lower] + hum_ind[:lower]
        self.train_x = self.train_x.iloc[sel_lit]
        self.train_y = self.train_y.iloc[sel_lit]
        random.shuffle(sel_lit)

    def over_sampling(self, upper_fra):
        mac_ind = self.train_y[self.train_y == MACHINE_IND].index.to_list()
        hum_ind = self.train_y[self.train_y == HUMAN_IND].index.to_list()
        lower = min(len(mac_ind), len(hum_ind))
        if lower == len(mac_ind):
            upper = int(lower*upper_fra) if lower*upper_fra < len(hum_ind) else len(hum_ind)
            major = hum_ind[:upper]
            minor = mac_ind[:lower]
    
        else:
            upper = int(lower*upper_fra) if lower*upper_fra < len(mac_ind) else len(mac_ind)
            major = mac_ind[:upper]
            minor = hum_ind[:lower]
        
        add_n = upper - lower
        oversampled = []
        while(len(oversampled) < add_n):
            oversampled.append(random.choice(mac_ind))
        sel_lit = major + minor + oversampled
        random.shuffle(sel_lit)
        
        self.train_x = self.train_x.iloc[sel_lit]
        self.train_y = self.train_y.iloc[sel_lit]
    
    def test_down(self, frac = 1):
        mac_ind = self.test_y[self.test_y == MACHINE_IND].index.to_list()
        hum_ind = self.test_y[self.test_y == HUMAN_IND].index.to_list()
        lower = min(len(mac_ind), len(hum_ind))
        if frac > 1:
            sel_lit = mac_ind[:lower] + hum_ind[:int(lower/frac)]
        else:
            sel_lit = mac_ind[:int(lower*frac)] + hum_ind[:lower]
        self.test_x = self.test_x.iloc[sel_lit]
        self.test_y = self.test_y.iloc[sel_lit]
        random.shuffle(sel_lit)




def f1_loss(y_true, y_pred):
    # Calculate precision and recall
    tp = backend.sum(backend.round(backend.clip(y_true * y_pred, 0, 1)))
    fp = backend.sum(backend.round(backend.clip(y_pred - y_true, 0, 1)))
    fn = backend.sum(backend.round(backend.clip(y_true - y_pred, 0, 1)))
    precision = tp / (tp + fp + backend.epsilon())
    recall = tp / (tp + fn + backend.epsilon())
    
    # Calculate F1 score
    f1_score = 2 * precision * recall / (precision + recall + backend.epsilon())
    
    # Return negative F1 score as the loss (to minimize it)
    return -f1_score



import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense, Concatenate
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
import numpy as np
import warnings

class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), 
             Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience = 3)
random.seed(RANDOM_SEED)
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
if len(tf.config.list_physical_devices('GPU')):
    tf.config.list_physical_devices('GPU')
    print("Using GPU")


2023-04-22 01:09:47.473139: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-22 01:09:47.575045: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available:  1
Using GPU


2023-04-22 01:09:50.568737: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 01:09:50.700331: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 01:09:50.701287: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


## Domain 2 weighted

In [2]:
over_fra = 1.6
weight_fra = 300

## _______________ Read data from domain 1 _______________
man_1_df = pd.read_json(HUMAN_1_P)
man_1_df["label"] = HUMAN_IND
mac_1_df = pd.read_json(MACHINE_1_P).drop("machine_id", axis = 1)
mac_1_df["label"] = MACHINE_IND
domain_1_df = pd.concat([man_1_df, mac_1_df])

domain_1 = DomainData(domain_1_df[["prompt", "txt"]], domain_1_df["label"])
domain_1.t_t_spli(TEST_FRA, RANDOM_SEED)
# domain_1.down_sampling()
domain_1.add_padding('post', MAX_PRO_LEN, MAX_TXT_LEN)

## _______________ Read data from domain 2 _______________
man_2_df = pd.read_json(HUMAN_2_P)
man_2_df["label"] = HUMAN_IND
mac_2_df = pd.read_json(MACHINE_2_P).drop("machine_id", axis = 1)
mac_2_df["label"] = MACHINE_IND
domain_2_df = pd.concat([man_2_df, mac_2_df])

domain_2 = DomainData(domain_2_df[["prompt", "txt"]], domain_2_df["label"])
domain_2.t_t_spli(TEST_FRA, RANDOM_SEED)
# domain_2.over_sampling(over_fra)
domain_2.test_down()
domain_2.add_padding('post', MAX_PRO_LEN, MAX_TXT_LEN)



## _______________ weight data _______________
sample_weight_1 = np.ones(len(domain_1.train_label))
sample_weight_2 = np.ones(len(domain_2.train_label))
sample_weight_2 *= weight_fra
sample_weight = np.concatenate([sample_weight_1, sample_weight_2])

train_prompt = np.concatenate([domain_1.train_prompt, domain_2.train_prompt])
train_txt = np.concatenate([domain_1.train_txt, domain_2.train_txt])
train_label = np.concatenate([domain_1.train_label, domain_2.train_label])

data = list(zip(train_prompt, train_txt, train_label, sample_weight))
random.shuffle(data)

train_prompt, train_txt, train_label, sample_weight = zip(*data)
train_prompt = np.array(train_prompt)
train_txt = np.array(train_txt)
train_label = np.array(train_label)
sample_weight = np.array(sample_weight)

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense, Concatenate
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), 
             Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

vocab_size = 5000  # Only consider the top 20k words
embed_dim = 128  # Embedding size for each token
num_heads = 8  # Number of attention heads
ff_dim = 20  # Hidden layer size in feed forward network inside transformer
epo_size = NO_EPO
batch_size = 128
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)

# define model
inputs_p = Input(shape=(MAX_PRO_LEN,))
embedding_layer = TokenAndPositionEmbedding(MAX_PRO_LEN, vocab_size, embed_dim)
x = embedding_layer(inputs_p)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(20, activation="relu")(x)
x = Dropout(0.1)(x)


# define model
inputs_t = Input(shape=(MAX_TXT_LEN,))
embedding_layer = TokenAndPositionEmbedding(MAX_TXT_LEN, vocab_size, embed_dim)
y = embedding_layer(inputs_t)
y = transformer_block(y)
y = GlobalAveragePooling1D()(y)
y = Dropout(0.1)(y)
y = Dense(20, activation="relu")(y)
y = Dropout(0.1)(y)


# Concatenate outputs from prompt and text models
merged = Concatenate()([x, y])
merged = Dense(units=64, activation='relu')(merged)
merged = Dense(20, activation="relu")(merged)
outputs = Dense(units=1, activation='sigmoid')(merged)
trans_model_2 = Model(inputs=[inputs_p, inputs_t], outputs=outputs)

# Compile and train
trans_model_2.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", f1_loss])
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('trans_model_weighted.h5', monitor='val_loss', save_best_only=True)
trans_model_2.fit([train_prompt, train_txt], train_label, epochs=epo_size, batch_size=batch_size, sample_weight = sample_weight, validation_split=0.2, callbacks = [callback, model_checkpoint])
print("Model Saved: trans_model_weighted.h5")

trans_model_2 = tf.keras.models.load_model("trans_model_weighted.h5", custom_objects={ 'f1_loss': f1_loss, 'TransformerBlock': TransformerBlock, 'TokenAndPositionEmbedding':TokenAndPositionEmbedding })
print("Model Loaded: trans_model_weighted.h5")
loss, accuracy, f1 = trans_model_2.evaluate([domain_2.test_prompt, domain_2.test_txt], domain_2.test_label, verbose=False)
print("loss: ", loss)
print("accuracy", accuracy)
trans_2_pre_rnn = trans_model_2.predict([domain_2.test_prompt, domain_2.test_txt])
trans_2_pre_rnn = np.round(trans_2_pre_rnn).flatten()
confusion = confusion_matrix(domain_2.test_label, trans_2_pre_rnn)
# trans_2_pre_rnn = [0 if i.flatten()[0] > i.flatten()[1] else 1 for i in trans_2_pre_rnn]
# confusion = confusion_matrix(domain_2.test_label, trans_2_pre_rnn)
print(confusion)
f1 = f1_score(domain_2.test_label, trans_2_pre_rnn)
print("f1-score: ", f1)



2023-04-22 00:27:59.624895: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 00:27:59.625503: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 00:27:59.626109: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 00:28:00.579130: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 00:28:00.579818: I tensorflow/compile

Epoch 1/60


2023-04-22 00:28:19.638271: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-04-22 00:28:20.002139: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x4e707280 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-04-22 00:28:20.002172: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2023-04-22 00:28:20.006800: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-04-22 00:28:20.217638: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2023-04-22 00:28:20.333364: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime o

Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Model Saved: trans_model_weighted.h5
Model Loaded: trans_model_weighted.h5
loss:  0.0002441598626319319
accuracy 1.0
[[1 0]
 [0 1]]
f1-score:  1.0


## Domain 1

In [2]:
## _______________ Read data from domain 1 _______________
man_1_df = pd.read_json(HUMAN_1_P)
man_1_df["label"] = HUMAN_IND
mac_1_df = pd.read_json(MACHINE_1_P).drop("machine_id", axis = 1)
mac_1_df["label"] = MACHINE_IND
domain_1_df = pd.concat([man_1_df, mac_1_df])

domain_1 = DomainData(domain_1_df[["prompt", "txt"]], domain_1_df["label"])
domain_1.t_t_spli(TEST_FRA, RANDOM_SEED)
domain_1.over_sampling(1.6)
domain_1.test_down()
domain_1.add_padding('post', MAX_PRO_LEN, MAX_TXT_LEN)


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dropout, Layer
from tensorflow.keras.layers import Embedding, Input, GlobalAveragePooling1D, Dense, Concatenate
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), 
             Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

vocab_size = 5000  # Only consider the top 20k words
embed_dim = 128  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 64  # Hidden layer size in feed forward network inside transformer
epo_size = NO_EPO
batch_size = 128
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)

# define model
inputs_p = Input(shape=(MAX_PRO_LEN,))
embedding_layer = TokenAndPositionEmbedding(MAX_PRO_LEN, vocab_size, embed_dim)
x = embedding_layer(inputs_p)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(20, activation="relu")(x)
x = Dropout(0.1)(x)


# define model
inputs_t = Input(shape=(MAX_TXT_LEN,))
embedding_layer = TokenAndPositionEmbedding(MAX_TXT_LEN, vocab_size, embed_dim)
y = embedding_layer(inputs_t)
y = transformer_block(y)
y = GlobalAveragePooling1D()(y)
y = Dropout(0.1)(y)
y = Dense(20, activation="relu")(y)
y = Dropout(0.1)(y)


# Concatenate outputs from prompt and text models
merged = Concatenate()([x, y])
merged = Dense(units=64, activation='relu')(merged)
merged = Dense(20, activation="relu")(merged)
outputs = Dense(units=1, activation='sigmoid')(merged)
trans_model_2 = Model(inputs=[inputs_p, inputs_t], outputs=outputs)

# Compile and train
trans_model_2.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", f1_loss])
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('trans_model.h5', monitor='val_loss', save_best_only=True)
trans_model_2.fit([domain_1.train_prompt, domain_1.train_txt], domain_1.train_label, epochs=epo_size, batch_size=batch_size, validation_split=0.2, callbacks = [callback, model_checkpoint])
print("Model Saved: trans_model.h5")

trans_model_2 = tf.keras.models.load_model("trans_model.h5", custom_objects={ 'f1_loss': f1_loss, 'TransformerBlock': TransformerBlock, 'TokenAndPositionEmbedding':TokenAndPositionEmbedding })
print("Model Loaded: trans_model.h5")
loss, accuracy, f1 = trans_model_2.evaluate([domain_1.test_prompt, domain_1.test_txt], domain_1.test_label, verbose=False)
print("loss: ", loss)
print("accuracy", accuracy)
trans_1_pre_rnn = trans_model_2.predict([domain_1.test_prompt, domain_1.test_txt])
trans_1_pre_rnn = np.round(trans_1_pre_rnn).flatten()
confusion = confusion_matrix(domain_1.test_label, trans_1_pre_rnn)
# trans_1_pre_rnn = [0 if i.flatten()[0] > i.flatten()[1] else 1 for i in trans_1_pre_rnn]
# confusion = confusion_matrix(domain_1.test_label, trans_1_pre_rnn)
print(confusion)
f1 = f1_score(domain_1.test_label, trans_1_pre_rnn)
print("f1-score: ", f1)



2023-04-22 01:09:54.344770: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 01:09:54.345259: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 01:09:54.345650: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 01:09:55.522950: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-22 01:09:55.523678: I tensorflow/compile

Epoch 1/60


2023-04-22 01:10:00.443255: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-04-22 01:10:00.520292: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x4dd54eb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-04-22 01:10:00.520331: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2023-04-22 01:10:00.525595: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-04-22 01:10:00.766116: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2023-04-22 01:10:00.875155: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime o

Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Model Saved: trans_model.h5
Model Loaded: trans_model.h5
loss:  0.3119431436061859
accuracy 0.9142857193946838
[[32  3]
 [ 3 32]]
f1-score:  0.9142857142857143


### test on test

In [4]:
DOMAIN_SPL = 600

test_df = pd.read_json(TEST_P)
test_prompt = pad_sequences(test_df["prompt"], padding="post", maxlen=MAX_PRO_LEN)
test_txt = pad_sequences(test_df["txt"], padding="post", maxlen=MAX_TXT_LEN)

model_1 = tf.keras.models.load_model("trans_model.h5", custom_objects={ 'f1_loss': f1_loss, 'TransformerBlock': TransformerBlock, 'TokenAndPositionEmbedding':TokenAndPositionEmbedding })
model_2 = tf.keras.models.load_model("trans_model_weighted.h5", custom_objects={ 'f1_loss': f1_loss, 'TransformerBlock': TransformerBlock, 'TokenAndPositionEmbedding':TokenAndPositionEmbedding })

pred = []
pred += model_1.predict([test_prompt[:DOMAIN_SPL], test_txt[:DOMAIN_SPL]]).tolist()
pred += model_2.predict([test_prompt[DOMAIN_SPL:], test_txt[DOMAIN_SPL:]]).tolist()
pred = [int(i) for i in np.round(pred).flatten()]




In [5]:
pred_df = pd.DataFrame(pred)
pred_df.columns = ["Predicted"]
pred_df.index.names = ['Id']

pred_df.to_csv("./data/result3.csv")