# Imports

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt # plotting
import matplotlib.image as mpimg # images
import numpy as np #numpy
import seaborn as sns #plotting
import tensorflow.compat.v2 as tf #use tensorflow v2 as a main 
import tensorflow.keras as keras # required for high level applications
from sklearn.model_selection import train_test_split # split for validation sets
from sklearn.metrics import accuracy_score, f1_score, classification_report # metrics
import scipy
import pandas as pd
import re

# Load data

In [None]:
from sklearn.utils import shuffle

train_data = pd.read_csv('train_data_csfd.csv')
train_data = shuffle(train_data)
train_data.head()

Unnamed: 0,text,label
34533,Proč? Vážení proč se investují peníze do těhle...,0
5846,"Bandička kamarádů se sebrala a natočila, jak z...",0
29554,Řečeno sportovní terminologií K.Branagh po své...,1
33829,Série o MM bez MM ....wtf? Co tím tvůrci sledo...,0
21184,Neco tak neuveritelne blbyho jsem uz dlouho ne...,0


In [None]:
x_train = list(train_data['text'])
# print(x[:5])

y_train = list(train_data['label'])
# print(y[:5])

#x_train = x_train[:20000]
#y_train = y_train[:20000]

- uncomment to use other values

In [None]:
test_data = pd.read_csv('test_data_csfd.csv')

x_test = list(test_data['text'])
y_test = list(test_data['label'])

# Download FastText embeddings

In [None]:
!wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cs.300.vec.gz

--2022-04-18 08:37:21--  https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cs.300.vec.gz
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 104.22.74.142, 172.67.9.4, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1262989069 (1.2G) [binary/octet-stream]
Saving to: ‘cc.cs.300.vec.gz’


2022-04-18 08:37:50 (42.4 MB/s) - ‘cc.cs.300.vec.gz’ saved [1262989069/1262989069]



In [None]:
import gzip

In [None]:
!gzip -d cc.cs.300.vec.gz

In [None]:
path_to_fasttext_file = './cc.cs.300.vec'

embeddings_index = {}
with open(path_to_fasttext_file) as f:
    for line in f:
        word, coefs = line.split(maxsplit=1)
        coefs = np.fromstring(coefs, "f", sep=" ")
        embeddings_index[word] = coefs

print("Found %s word vectors." % len(embeddings_index))

Found 2000000 word vectors.


# Train model and predict on test dataset

In [None]:
from tensorflow import string as tf_string
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.compat.v1.keras.layers import CuDNNGRU, CuDNNLSTM
from tensorflow.keras.layers import LSTM, GRU, Bidirectional

# 1 - 5 Vectorizer parameters

In [None]:
from tensorflow import string as tf_string
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

embedding_dim = 300 # Dimension of embedded representation
vocab_size = 30000 # Number of unique tokens in vocabulary
sequence_length = 50 # Output dimension after vectorizing

vect_layer = TextVectorization(max_tokens=vocab_size, output_mode='int', output_sequence_length=sequence_length)
vect_layer.adapt(x_train)

In [None]:
voc = vect_layer.get_vocabulary()
word_index = dict(zip(voc, range(len(voc))))

In [None]:
voc[:10]

['', '[UNK]', 'a', 'se', 'to', 'je', 'na', 'v', 'jsem', 'že']

In [None]:
num_tokens = len(voc) + 2
hits = 0
misses = 0


embedding_matrix = np.zeros((num_tokens, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1
print("Converted %d words (%d misses)" % (hits, misses))

Converted 28152 words (1848 misses)


In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10, random_state=69, stratify=y_train)

## 1.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = LSTM(64, activation='relu', return_sequences=True)(emb)
x = GRU(64, activation='relu', return_sequences=False)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.2)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_1 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_2 (Embedding)     (None, 50, 300)           9000600   
                                                                 
 lstm_2 (LSTM)               (None, 50, 64)            93440     
                                                                 
 gru_2 (GRU)                 (None, 64)                24960     
                                                                 
 flatten_2 (Flatten)         (None, 64)                0         
                                                           

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 5
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.evaluate(x_test,y_test)



[0.34432554244995117, 0.8490999937057495]

In [None]:
y_pred=model.predict(x_test)


accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 84.91% 
f1-score is 0.8471589182619265% 
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      5000
           1       0.86      0.84      0.85      5000

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000



## 2.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = Bidirectional(LSTM(64, activation='relu', return_sequences=True))(emb)
x = Bidirectional(GRU(64, activation='relu', return_sequences=False))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.2)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_1 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_4 (Embedding)     (None, 50, 300)           9000600   
                                                                 
 bidirectional_4 (Bidirectio  (None, 50, 128)          186880    
 nal)                                                            
                                                                 
 bidirectional_5 (Bidirectio  (None, 128)              74496     
 nal)                                                            
                                                           

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 4
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
model.evaluate(x_test,y_test)



[0.3427308201789856, 0.848800003528595]

In [None]:
y_pred=model.predict(x_test)


accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 84.88% 
f1-score is 0.8411430972893466% 
              precision    recall  f1-score   support

           0       0.82      0.90      0.86      5000
           1       0.89      0.80      0.84      5000

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000



## 3.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = LSTM(64, activation='relu', return_sequences=True)(emb)
x = GRU(64, activation='relu', return_sequences=False)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.2)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_9 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_2 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_8 (Embedding)     (None, 50, 300)           9000600   
                                                                 
 lstm_8 (LSTM)               (None, 50, 64)            93440     
                                                                 
 gru_8 (GRU)                 (None, 64)                24960     
                                                                 
 flatten_8 (Flatten)         (None, 64)                0         
                                                           

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 2
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/2
Epoch 2/2


In [None]:
model.evaluate(x_test,y_test)



[0.3307604193687439, 0.8531000018119812]

In [None]:
y_pred=model.predict(x_test)


accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 85.31% 
f1-score is 0.847026970738311% 
              precision    recall  f1-score   support

           0       0.83      0.89      0.86      5000
           1       0.88      0.81      0.85      5000

    accuracy                           0.85     10000
   macro avg       0.86      0.85      0.85     10000
weighted avg       0.86      0.85      0.85     10000



## 4.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = Bidirectional(LSTM(64, activation='relu', return_sequences=True))(emb)
x = Bidirectional(GRU(64, activation='relu', return_sequences=False))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.2)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 1)]               0         
                                                                 
 text_vectorization_2 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_9 (Embedding)     (None, 50, 300)           9000600   
                                                                 
 bidirectional_8 (Bidirectio  (None, 50, 128)          186880    
 nal)                                                            
                                                                 
 bidirectional_9 (Bidirectio  (None, 128)              74496     
 nal)                                                            
                                                           

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 2
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/2
Epoch 2/2


In [None]:
model.evaluate(x_test,y_test)



[0.33368927240371704, 0.8525000214576721]

In [None]:
y_pred=model.predict(x_test)


accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 85.25% 
f1-score is 0.8563638134190281% 
              precision    recall  f1-score   support

           0       0.87      0.83      0.85      5000
           1       0.83      0.88      0.86      5000

    accuracy                           0.85     10000
   macro avg       0.85      0.85      0.85     10000
weighted avg       0.85      0.85      0.85     10000



## 5.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = Bidirectional(LSTM(128, activation='relu', return_sequences=True))(emb)
x = Bidirectional(LSTM(64, activation='relu', return_sequences=True))(x)
x = keras.layers.Dropout(0.4)(x)
x = Bidirectional(GRU(64, activation='relu', return_sequences=False))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.4)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 1)]               0         
                                                                 
 text_vectorization_2 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_10 (Embedding)    (None, 50, 300)           9000600   
                                                                 
 bidirectional_10 (Bidirecti  (None, 50, 256)          439296    
 onal)                                                           
                                                                 
 bidirectional_11 (Bidirecti  (None, 50, 128)          164352    
 onal)                                                           
                                                          

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 2
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/2
Epoch 2/2


In [None]:
model.evaluate(x_test,y_test)



[0.2883777916431427, 0.8751000165939331]

In [None]:
y_pred=model.predict(x_test)

accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 87.51% 
f1-score is 0.875857270649041% 
              precision    recall  f1-score   support

           0       0.88      0.87      0.87      5000
           1       0.87      0.88      0.88      5000

    accuracy                           0.88     10000
   macro avg       0.88      0.88      0.88     10000
weighted avg       0.88      0.88      0.88     10000



# 6 - 8 Vectorizer parameters

In [None]:
embedding_dim = 300 # Dimension of embedded representation 
vocab_size = 100000 # Number of unique tokens in vocabulary
sequence_length = 50 # Output dimension after vectorizing

vect_layer = TextVectorization(max_tokens=vocab_size, output_mode='int', output_sequence_length=sequence_length)
vect_layer.adapt(x_train)

In [None]:
voc = vect_layer.get_vocabulary()
word_index = dict(zip(voc, range(len(voc))))

In [None]:
voc[:10]

['', '[UNK]', 'a', 'se', 'to', 'je', 'na', 'v', 'jsem', 'že']

In [None]:
num_tokens = len(voc) + 2
hits = 0
misses = 0


embedding_matrix = np.zeros((num_tokens, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1
print("Converted %d words (%d misses)" % (hits, misses))

Converted 78666 words (21334 misses)


In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10, random_state=69, stratify=y_train)

## 6.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = LSTM(64, activation='relu', return_sequences=True)(emb)
x = GRU(64, activation='relu', return_sequences=False)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.2)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 1)]               0         
                                                                 
 text_vectorization_3 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_11 (Embedding)    (None, 50, 300)           30000600  
                                                                 
 lstm_12 (LSTM)              (None, 50, 64)            93440     
                                                                 
 gru_11 (GRU)                (None, 64)                24960     
                                                                 
 flatten_11 (Flatten)        (None, 64)                0         
                                                          

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 2
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/2
Epoch 2/2


In [None]:
model.evaluate(x_test,y_test)



[0.36707910895347595, 0.8355000019073486]

In [None]:
y_pred=model.predict(x_test)


accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 83.55% 
f1-score is 0.8418117126646792% 
              precision    recall  f1-score   support

           0       0.86      0.80      0.83      5000
           1       0.81      0.88      0.84      5000

    accuracy                           0.84     10000
   macro avg       0.84      0.84      0.84     10000
weighted avg       0.84      0.84      0.84     10000



## 7.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = Bidirectional(LSTM(64, activation='relu', return_sequences=True))(emb)
x = Bidirectional(GRU(64, activation='relu', return_sequences=False))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.2)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 1)]               0         
                                                                 
 text_vectorization_3 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_12 (Embedding)    (None, 50, 300)           30000600  
                                                                 
 bidirectional_13 (Bidirecti  (None, 50, 128)          186880    
 onal)                                                           
                                                                 
 bidirectional_14 (Bidirecti  (None, 128)              74496     
 onal)                                                           
                                                          

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 2
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/2
Epoch 2/2


In [None]:
model.evaluate(x_test,y_test)



[0.3174686133861542, 0.8610000014305115]

In [None]:
y_pred=model.predict(x_test)


accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 86.1% 
f1-score is 0.8560778629115758% 
              precision    recall  f1-score   support

           0       0.84      0.90      0.87      5000
           1       0.89      0.83      0.86      5000

    accuracy                           0.86     10000
   macro avg       0.86      0.86      0.86     10000
weighted avg       0.86      0.86      0.86     10000



## 8.Experiment

In [None]:
input_layer = keras.layers.Input(shape=(1,), dtype=tf_string)
x_v = vect_layer(input_layer)
emb = keras.layers.Embedding(num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False)(x_v)
x = Bidirectional(LSTM(128, activation='relu', return_sequences=True))(emb)
x = Bidirectional(LSTM(64, activation='relu', return_sequences=True))(x)
x = keras.layers.Dropout(0.4)(x)
x = Bidirectional(GRU(64, activation='relu', return_sequences=True))(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, 'relu')(x)
x = keras.layers.Dropout(0.4)(x)
x = keras.layers.Dense(32, 'relu')(x)
x = keras.layers.Dropout(0.4)(x)
output_layer = keras.layers.Dense(1, 'sigmoid')(x)

model = keras.Model(input_layer, output_layer)
model.summary()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss=keras.losses.BinaryCrossentropy(), metrics=['accuracy'])

Model: "model_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 1)]               0         
                                                                 
 text_vectorization_3 (TextV  (None, 50)               0         
 ectorization)                                                   
                                                                 
 embedding_14 (Embedding)    (None, 50, 300)           30000600  
                                                                 
 bidirectional_18 (Bidirecti  (None, 50, 256)          439296    
 onal)                                                           
                                                                 
 bidirectional_19 (Bidirecti  (None, 50, 128)          164352    
 onal)                                                           
                                                          

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=70, restore_best_weights=True)

batch_size = 256
epochs = 2
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[es], epochs=epochs, batch_size=batch_size)

Epoch 1/2
Epoch 2/2


In [None]:
model.evaluate(x_test,y_test)



[0.30245357751846313, 0.8676000237464905]

In [None]:
y_pred=model.predict(x_test)

accuracy_sc = accuracy_score(y_pred=y_pred.round(),y_true=y_test)*100
f1_sc = f1_score(y_pred=y_pred.round(),y_true=y_test)

print("Accuracy score is {}% ".format(accuracy_sc))
print("f1-score is {}% ".format(f1_sc))
print(classification_report(y_pred=y_pred.round(),y_true=y_test))

Accuracy score is 86.76% 
f1-score is 0.8739287754713388% 
              precision    recall  f1-score   support

           0       0.91      0.82      0.86      5000
           1       0.83      0.92      0.87      5000

    accuracy                           0.87     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.87      0.87      0.87     10000



# Save model

In [None]:
model.save('CZ_FT_first')

INFO:tensorflow:Assets written to: FR_LSTM_sixth/assets




In [None]:
!zip -r /content/CZFTfirst.zip /content/CZ_FT_first/

  adding: content/FR_LSTM_sixth/ (stored 0%)
  adding: content/FR_LSTM_sixth/variables/ (stored 0%)
  adding: content/FR_LSTM_sixth/variables/variables.index (deflated 67%)
  adding: content/FR_LSTM_sixth/variables/variables.data-00000-of-00001 (deflated 8%)
  adding: content/FR_LSTM_sixth/assets/ (stored 0%)
  adding: content/FR_LSTM_sixth/keras_metadata.pb (deflated 90%)
  adding: content/FR_LSTM_sixth/saved_model.pb (deflated 78%)
