## Automated Detection of Fake News Spreaders: An Evaluative Study of Transformers and SOTA Models on Multilingual Dataset. 
Multi Channel CNN Model, Training and Testing Notebook.
Code by M. Siino. 

From the paper: "Automated Detection of Fake News Spreaders: An Evaluative Study of Transformers and SOTA Models on Multilingual Dataset." by M.Siino et al.

## Importing modules.

In [1]:
import matplotlib.pyplot as plt
import os
import re
import shutil
import string
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from keras.models import Model
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from io import open
from pathlib import Path

## Importing DS and extract in current working directory.

In [2]:
# Url obtained starting from this: https://drive.google.com/file/d/19ZcqEv88euKB71HfAWjTGN3uCKp2qsfP/ and forcing export=download.
urlTrainingSet = "https://github.com/marco-siino/fake_news_spreaders_detection/raw/main/dataset/pan20-author-profiling-training-2020-02-23.zip"
urlTestSet="https://github.com/marco-siino/fake_news_spreaders_detection/raw/main/dataset/pan20-author-profiling-test-2020-02-23.zip"

training_set = tf.keras.utils.get_file("pan20-author-profiling-training-2020-02-23.zip", urlTrainingSet,
                                    extract=True, archive_format='zip',cache_dir='.',
                                    cache_subdir='')
test_set = tf.keras.utils.get_file("pan20-author-profiling-test-2020-02-23.zip", urlTestSet,
                                    extract=True, archive_format='zip',cache_dir='.',
                                    cache_subdir='')

training_set_dir = os.path.join(os.path.dirname(training_set), 'pan20-author-profiling-training-2020-02-23')
test_set_dir = os.path.join(os.path.dirname(test_set), 'pan20-author-profiling-test-2020-02-23')

print(training_set)
print(training_set_dir)

!ls -A

Downloading data from https://github.com/marco-siino/fake_news_spreaders_detection/raw/main/dataset/pan20-author-profiling-training-2020-02-23.zip


  16384/3094459 [..............................] - ETA: 0s

 638976/3094459 [=====>........................] - ETA: 0s





Downloading data from https://github.com/marco-siino/fake_news_spreaders_detection/raw/main/dataset/pan20-author-profiling-test-2020-02-23.zip


  16384/2135236 [..............................] - ETA: 0s







./pan20-author-profiling-training-2020-02-23.zip
./pan20-author-profiling-training-2020-02-23


__MACOSX
__notebook__.ipynb
pan20-author-profiling-test-2020-02-23
pan20-author-profiling-test-2020-02-23.zip
pan20-author-profiling-training-2020-02-23
pan20-author-profiling-training-2020-02-23.zip


## Build folders hierarchy to use Keras folders preprocessing function.

In [3]:
### Training Folders. ###

# First level directory.
if not os.path.exists('train_dir_en'):
    os.makedirs('train_dir_en')
if not os.path.exists('train_dir_es'):
    os.makedirs('train_dir_es')

# Class labels directory.
if not os.path.exists('train_dir_en/0'):
    os.makedirs('train_dir_en/0')
if not os.path.exists('train_dir_es/0'):
    os.makedirs('train_dir_es/0')
if not os.path.exists('train_dir_en/1'):
    os.makedirs('train_dir_en/1')
if not os.path.exists('train_dir_es/1'):
    os.makedirs('train_dir_es/1')

# Make Py variables.
train_dir='train_dir_'

## Test Folders. ##
# First level directory.
if not os.path.exists('test_dir_en'):
    os.makedirs('test_dir_en')
if not os.path.exists('test_dir_es'):
    os.makedirs('test_dir_es')

# Class labels directory.
if not os.path.exists('test_dir_en/0'):
    os.makedirs('test_dir_en/0')
if not os.path.exists('test_dir_es/0'):
    os.makedirs('test_dir_es/0')
if not os.path.exists('test_dir_en/1'):
    os.makedirs('test_dir_en/1')
if not os.path.exists('test_dir_es/1'):
    os.makedirs('test_dir_es/1')

# Make Py variables.
test_dir='test_dir_'

!ls -A

__MACOSX
__notebook__.ipynb
pan20-author-profiling-test-2020-02-23
pan20-author-profiling-test-2020-02-23.zip
pan20-author-profiling-training-2020-02-23
pan20-author-profiling-training-2020-02-23.zip
test_dir_en
test_dir_es
train_dir_en
train_dir_es


## Set language and directory paths.


In [4]:
# Set en and es train_dir and test_dir paths.
language='es'

truth_file_training_dir_es=training_set_dir+'/'+language+'/'
truth_file_training_path_es = truth_file_training_dir_es+'truth.txt'

truth_file_test_dir=test_set_dir
truth_file_test_path_es = truth_file_test_dir+'/'+language+'.txt'


language='en'

truth_file_training_dir_en=training_set_dir+'/'+language+'/'
truth_file_training_path_en = truth_file_training_dir_en+'truth.txt'

truth_file_test_path_en = truth_file_test_dir+'/'+language+'.txt'

## Read truth.txt to organize training dataset folders.



In [5]:
# Organize EN folders.
language='en'
# Open the file truth.txt with read only permit.
f = open(truth_file_training_path_en, "r")
# use readline() to read the first line 
line = f.readline()
# use the read line to read further.
# If the file is not empty keep reading one line
# at a time, till the file is empty
while line:
    # Split line at :::
    x = line.split(":::")
    fNameXml = x[0]+'.xml'
    fNameTxt = x[0]+'.txt'
    # Second coord [0] gets just the first character (label) and not /n too.
    label = x[1][0]

    # Now move the file to the right folder.
    if os.path.exists(truth_file_training_dir_en+fNameXml):
      os.rename(truth_file_training_dir_en+fNameXml, './train_dir_'+language+'/'+label+'/'+fNameTxt )

    # use readline() to read next line
    line = f.readline()

language='es'
# Organize ES folders.
# Open the file truth.txt with read only permit.
f = open(truth_file_training_path_es, "r")
# use readline() to read the first line 
line = f.readline()
# use the read line to read further.
# If the file is not empty keep reading one line
# at a time, till the file is empty
while line:
    # Split line at :::
    x = line.split(":::")
    fNameXml = x[0]+'.xml'
    fNameTxt = x[0]+'.txt'
    # Second coord [0] gets just the first character (label) and not /n too.
    label = x[1][0]

    # Now move the file to the right folder.
    if os.path.exists(truth_file_training_dir_es+fNameXml):
      os.rename(truth_file_training_dir_es+fNameXml, './train_dir_'+language+'/'+label+'/'+fNameTxt )

    # use readline() to read next line
    line = f.readline()

## Read truth.txt to organize test dataset folders.

In [6]:
#Organize EN folders.
language='en'
# Open the file truth.txt with read only permit.
f = open(truth_file_test_path_en, "r")
# use readline() to read the first line 
line = f.readline()
# use the read line to read further.
# If the file is not empty keep reading one line
# at a time, till the file is empty
while line:
    # Split line at :::
    x = line.split(":::")
    fNameXml = x[0]+'.xml'
    fNameTxt = x[0]+'.txt'
    # Second coord [0] gets just the first character (label) and not /n too.
    label = x[1][0]

    # Now move the file to the right folder.
    if os.path.exists(truth_file_test_dir+'/'+language+'/'+fNameXml):
      os.rename(truth_file_test_dir+'/'+language+'/'+fNameXml, './test_dir_'+language+'/'+label+'/'+fNameTxt )

    # use readline() to read next line
    line = f.readline()

#Organize EN folders.
language='es'
# Open the file truth.txt with read only permit.
f = open(truth_file_test_path_es, "r")
# use readline() to read the first line 
line = f.readline()
# use the read line to read further.
# If the file is not empty keep reading one line
# at a time, till the file is empty
while line:
    # Split line at :::
    x = line.split(":::")
    fNameXml = x[0]+'.xml'
    fNameTxt = x[0]+'.txt'
    # Second coord [0] gets just the first character (label) and not /n too.
    label = x[1][0]

    # Now move the file to the right folder.
    if os.path.exists(truth_file_test_dir+'/'+language+'/'+fNameXml):
      os.rename(truth_file_test_dir+'/'+language+'/'+fNameXml, './test_dir_'+language+'/'+label+'/'+fNameTxt )

    # use readline() to read next line
    line = f.readline()

## Function to pre-process source text.

In [7]:
def custom_standardization(input_data):
  tag_open_CDATA_removed = tf.strings.regex_replace(input_data, '<\!\[CDATA\[', ' ')
  tag_closed_CDATA_removed = tf.strings.regex_replace(tag_open_CDATA_removed,'\]{1,}>', ' ')
  tag_author_lang_es_removed = tf.strings.regex_replace(tag_closed_CDATA_removed,'<author lang="es">', ' ')
  tag_author_lang_en_removed = tf.strings.regex_replace(tag_author_lang_es_removed,'<author lang="en">', ' ')
  tag_closed_author_removed = tf.strings.regex_replace(tag_author_lang_en_removed,'</author>', ' ')
  tag_open_documents_removed = tf.strings.regex_replace(tag_closed_author_removed,'<documents>\n(\t){0,2}', '')
  output_data = tf.strings.regex_replace(tag_open_documents_removed,'</documents>\n(\t){0,2}', ' ')
  return output_data

## Building the dataset.

In [8]:
batch_size=1

# Build the dataset for Spanish.
language='es'

raw_train_ds_es = tf.keras.preprocessing.text_dataset_from_directory(
    train_dir+language, 
    batch_size=batch_size, 
    #validation_split=0.0, 
    #subset='training', 
    shuffle='false',
    seed=1
    )

raw_test_ds_es = tf.keras.preprocessing.text_dataset_from_directory(
    test_dir+language, 
    batch_size=batch_size,
    shuffle='false'
    )


# Build the dataset for Spanish.
language='en'

raw_train_ds_en = tf.keras.preprocessing.text_dataset_from_directory(
    train_dir+language, 
    batch_size=batch_size, 
    #validation_split=0.0, 
    #subset='training', 
    shuffle='false',
    seed=1
    )

raw_test_ds_en = tf.keras.preprocessing.text_dataset_from_directory(
    test_dir+language, 
    batch_size=batch_size,
    shuffle='false'
    )


Found 300 files belonging to 2 classes.


2022-03-24 22:23:16.466273: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-24 22:23:16.561062: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-24 22:23:16.561795: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-24 22:23:16.564950: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Found 200 files belonging to 2 classes.
Found 300 files belonging to 2 classes.


Found 200 files belonging to 2 classes.


## First model's layer: Text Vectorization.

In [9]:
# Maximum number of words allowed 80000 in our dictionary.
max_features = 80000
# After tokenization 4500 covers all the document lenghts in our dataset.
sequence_length = 4500

vectorize_layer_es = TextVectorization(
    standardize=custom_standardization,
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length)

vectorize_layer_en = TextVectorization(
    standardize=custom_standardization,
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length)

train_text = raw_train_ds_en.map(lambda x, y: x)
vectorize_layer_en.adapt(train_text)

train_text = raw_train_ds_es.map(lambda x, y: x)
vectorize_layer_es.adapt(train_text)

2022-03-24 22:23:19.615365: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


## Some training parameters...

In [10]:
# Word embedding dimensions.
embedding_dim = 500

num_runs = 5 
num_epochs_per_run = 100

opt = tf.keras.optimizers.RMSprop()

## Functions to create and compile models

In [11]:
def create_compile_model_en():
    input= tf.keras.Input(shape=(1,), dtype=tf.string)
    x=vectorize_layer_en(input)
  
    y1=layers.Embedding(len(vectorize_layer_en.get_vocabulary()) + 1, embedding_dim)(x)                     
    y1=layers.Dropout(0.8)(y1)

    y2=layers.Embedding(len(vectorize_layer_en.get_vocabulary()) + 1, embedding_dim)(x)                     
    y2=layers.Dropout(0.8)(y2)

    y3=layers.Embedding(len(vectorize_layer_en.get_vocabulary()) + 1, embedding_dim)(x)                     
    y3=layers.Dropout(0.8)(y3)

    y4=layers.Embedding(len(vectorize_layer_en.get_vocabulary()) + 1, embedding_dim)(x)                     
    y4=layers.Dropout(0.8)(y4)

    y1=layers.Conv1D(32,32)(y1)
    y1=layers.MaxPooling1D()(y1)
    y1=layers.Dropout(0.5)(y1)
    y1=layers.Flatten()(y1)

    y2=layers.Conv1D(32,16)(y2)
    y2=layers.MaxPooling1D()(y2)
    y2=layers.Dropout(0.5)(y2)
    y2=layers.Flatten()(y2)

    y3=layers.Conv1D(32,2)(y3)
    y3=layers.MaxPooling1D()(y3)
    y3=layers.Dropout(0.5)(y3)
    y3=layers.Flatten()(y3)

    y4=layers.Conv1D(32,1)(y4)
    y4=layers.MaxPooling1D()(y4)
    y4=layers.Dropout(0.5)(y4)
    y4=layers.Flatten()(y4)

    merged = layers.concatenate([y1,y2,y3,y4])                         
  
    x=layers.Dense(300)(merged)
    x=layers.Dropout(0.5)(x)
    output=layers.Dense(1)(x)
    model_en = tf.keras.Model(inputs=input, outputs=output)
    model_en.compile(loss=losses.BinaryCrossentropy(from_logits=True), optimizer=opt, metrics=tf.metrics.BinaryAccuracy(threshold=0.0)) 
    
    return model_en

def create_compile_model_es():
    input= tf.keras.Input(shape=(1,), dtype=tf.string)
    x=vectorize_layer_es(input)
  
    y1=layers.Embedding(len(vectorize_layer_es.get_vocabulary()) + 1, embedding_dim)(x)                     
    y1=layers.Dropout(0.8)(y1)

    y2=layers.Embedding(len(vectorize_layer_es.get_vocabulary()) + 1, embedding_dim)(x)                     
    y2=layers.Dropout(0.8)(y2)

    y3=layers.Embedding(len(vectorize_layer_es.get_vocabulary()) + 1, embedding_dim)(x)                     
    y3=layers.Dropout(0.8)(y3)

    y4=layers.Embedding(len(vectorize_layer_es.get_vocabulary()) + 1, embedding_dim)(x)                     
    y4=layers.Dropout(0.8)(y4)

    y1=layers.Conv1D(32,32)(y1)
    y1=layers.MaxPooling1D()(y1)
    y1=layers.Dropout(0.5)(y1)
    y1=layers.Flatten()(y1)

    y2=layers.Conv1D(32,16)(y2)
    y2=layers.MaxPooling1D()(y2)
    y2=layers.Dropout(0.5)(y2)
    y2=layers.Flatten()(y2)

    y3=layers.Conv1D(32,2)(y3)
    y3=layers.MaxPooling1D()(y3)
    y3=layers.Dropout(0.5)(y3)
    y3=layers.Flatten()(y3)

    y4=layers.Conv1D(32,1)(y4)
    y4=layers.MaxPooling1D()(y4)
    y4=layers.Dropout(0.5)(y4)
    y4=layers.Flatten()(y4)

    merged = layers.concatenate([y1,y2,y3,y4])                         
  
    x=layers.Dense(300)(merged)
    x=layers.Dropout(0.5)(x)
    output=layers.Dense(1)(x)
    model_es = tf.keras.Model(inputs=input, outputs=output)
    model_es.compile(loss=losses.BinaryCrossentropy(from_logits=True), optimizer=opt, metrics=tf.metrics.BinaryAccuracy(threshold=0.0))    
    
    return model_es

## Training and evaluation of English model

In [12]:
runs_accuracy = []
for run in range(1,(num_runs+1)):
    epochs_accuracy=[]
    model_en = create_compile_model_en()
    
    for epoch in range (0,num_epochs_per_run):
        history = model_en.fit(
          raw_train_ds_en,
          validation_data = raw_test_ds_en,
          epochs=1,
          shuffle=False,
          # Comment the following line to do not save and download the model.
          #callbacks=[callbacks]
          )
        accuracy = history.history['val_binary_accuracy']
        print("Run: ",run,"/ Accuracy at epoch ",epoch," is: ", accuracy[0],"\n")
        epochs_accuracy.append(accuracy[0])

    print("Accuracies over epochs:",epochs_accuracy,"\n\n")
    runs_accuracy.append(max(epochs_accuracy))

runs_accuracy.sort()
print("\n\n Over all runs maximum accuracies on English are:", runs_accuracy)
print("The median for English is:",runs_accuracy[2],"\n\n\n")


2022-03-24 22:23:24.357327: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


  1/300 [..............................] - ETA: 38:26 - loss: 0.6140 - binary_accuracy: 1.0000

  3/300 [..............................] - ETA: 12s - loss: 245.5783 - binary_accuracy: 0.3333

  5/300 [..............................] - ETA: 12s - loss: 380.3228 - binary_accuracy: 0.2000

  7/300 [..............................] - ETA: 12s - loss: 306.9463 - binary_accuracy: 0.2857

  9/300 [..............................] - ETA: 12s - loss: 243.6431 - binary_accuracy: 0.3333

 11/300 [>.............................] - ETA: 11s - loss: 210.9468 - binary_accuracy: 0.2727

 13/300 [>.............................] - ETA: 11s - loss: 178.4934 - binary_accuracy: 0.3846

 15/300 [>.............................] - ETA: 11s - loss: 157.2687 - binary_accuracy: 0.4000

 17/300 [>.............................] - ETA: 11s - loss: 139.1707 - binary_accuracy: 0.4118

 19/300 [>.............................] - ETA: 11s - loss: 130.3242 - binary_accuracy: 0.3684

 21/300 [=>............................] - ETA: 11s - loss: 122.5182 - binary_accuracy: 0.3333

 23/300 [=>............................] - ETA: 11s - loss: 115.1850 - binary_accuracy: 0.3043

 25/300 [=>............................] - ETA: 11s - loss: 109.4024 - binary_accuracy: 0.2800

 27/300 [=>............................] - ETA: 11s - loss: 101.7016 - binary_accuracy: 0.2963

 29/300 [=>............................] - ETA: 11s - loss: 97.1376 - binary_accuracy: 0.3103 

 31/300 [==>...........................] - ETA: 10s - loss: 91.6373 - binary_accuracy: 0.3226

 33/300 [==>...........................] - ETA: 10s - loss: 86.5215 - binary_accuracy: 0.3333

 35/300 [==>...........................] - ETA: 10s - loss: 83.0586 - binary_accuracy: 0.3429

 37/300 [==>...........................] - ETA: 10s - loss: 78.8018 - binary_accuracy: 0.3514

 39/300 [==>...........................] - ETA: 10s - loss: 74.7607 - binary_accuracy: 0.3846

 41/300 [===>..........................] - ETA: 10s - loss: 72.5151 - binary_accuracy: 0.3902

 43/300 [===>..........................] - ETA: 10s - loss: 69.7787 - binary_accuracy: 0.3953

 45/300 [===>..........................] - ETA: 10s - loss: 67.6222 - binary_accuracy: 0.4000

 47/300 [===>..........................] - ETA: 10s - loss: 65.6809 - binary_accuracy: 0.4043

 49/300 [===>..........................] - ETA: 10s - loss: 64.2598 - binary_accuracy: 0.4082

 51/300 [====>.........................] - ETA: 10s - loss: 62.5584 - binary_accuracy: 0.4118

 53/300 [====>.........................] - ETA: 10s - loss: 61.0129 - binary_accuracy: 0.3962

 55/300 [====>.........................] - ETA: 9s - loss: 59.9062 - binary_accuracy: 0.4000 

 57/300 [====>.........................] - ETA: 9s - loss: 58.5567 - binary_accuracy: 0.3860

 59/300 [====>.........................] - ETA: 9s - loss: 56.7805 - binary_accuracy: 0.3898

 61/300 [=====>........................] - ETA: 9s - loss: 55.4154 - binary_accuracy: 0.3934

 63/300 [=====>........................] - ETA: 9s - loss: 53.8391 - binary_accuracy: 0.3968

 65/300 [=====>........................] - ETA: 9s - loss: 52.1825 - binary_accuracy: 0.4154

 67/300 [=====>........................] - ETA: 9s - loss: 50.6248 - binary_accuracy: 0.4328

 69/300 [=====>........................] - ETA: 9s - loss: 49.5897 - binary_accuracy: 0.4348











































































































































































































































Run:  1 / Accuracy at epoch  0  is:  0.5 

  1/300 [..............................] - ETA: 11s - loss: 7.4193 - binary_accuracy: 0.0000e+00

  3/300 [..............................] - ETA: 11s - loss: 10.1415 - binary_accuracy: 0.0000e+00

  5/300 [..............................] - ETA: 11s - loss: 18.5348 - binary_accuracy: 0.0000e+00

  7/300 [..............................] - ETA: 11s - loss: 13.2422 - binary_accuracy: 0.2857    

  9/300 [..............................] - ETA: 11s - loss: 10.2996 - binary_accuracy: 0.4444

 11/300 [>.............................] - ETA: 11s - loss: 9.8284 - binary_accuracy: 0.3636 

 13/300 [>.............................] - ETA: 11s - loss: 8.3168 - binary_accuracy: 0.4615

 15/300 [>.............................] - ETA: 11s - loss: 7.2563 - binary_accuracy: 0.4667

 17/300 [>.............................] - ETA: 11s - loss: 6.4026 - binary_accuracy: 0.5294

 19/300 [>.............................] - ETA: 11s - loss: 6.0484 - binary_accuracy: 0.5263

 21/300 [=>............................] - ETA: 11s - loss: 6.3705 - binary_accuracy: 0.5238

 23/300 [=>............................] - ETA: 11s - loss: 5.8308 - binary_accuracy: 0.5652

 25/300 [=>............................] - ETA: 11s - loss: 5.3643 - binary_accuracy: 0.6000

 27/300 [=>............................] - ETA: 11s - loss: 4.9758 - binary_accuracy: 0.6296

 29/300 [=>............................] - ETA: 11s - loss: 4.6342 - binary_accuracy: 0.6552

 31/300 [==>...........................] - ETA: 11s - loss: 4.3352 - binary_accuracy: 0.6774

 33/300 [==>...........................] - ETA: 10s - loss: 4.8411 - binary_accuracy: 0.6364

 35/300 [==>...........................] - ETA: 10s - loss: 5.1174 - binary_accuracy: 0.6286

 37/300 [==>...........................] - ETA: 10s - loss: 5.8381 - binary_accuracy: 0.5946

 39/300 [==>...........................] - ETA: 10s - loss: 5.6568 - binary_accuracy: 0.5897

 41/300 [===>..........................] - ETA: 10s - loss: 5.3809 - binary_accuracy: 0.6098

 43/300 [===>..........................] - ETA: 10s - loss: 5.4982 - binary_accuracy: 0.5814

 45/300 [===>..........................] - ETA: 10s - loss: 5.4198 - binary_accuracy: 0.5778

 47/300 [===>..........................] - ETA: 10s - loss: 5.3108 - binary_accuracy: 0.5745

 49/300 [===>..........................] - ETA: 10s - loss: 5.3139 - binary_accuracy: 0.5510

 51/300 [====>.........................] - ETA: 10s - loss: 5.1055 - binary_accuracy: 0.5686

 53/300 [====>.........................] - ETA: 10s - loss: 5.1618 - binary_accuracy: 0.5660

 55/300 [====>.........................] - ETA: 10s - loss: 5.5557 - binary_accuracy: 0.5636

 57/300 [====>.........................] - ETA: 9s - loss: 5.3609 - binary_accuracy: 0.5789 

 59/300 [====>.........................] - ETA: 9s - loss: 5.3979 - binary_accuracy: 0.5763

 61/300 [=====>........................] - ETA: 9s - loss: 5.2209 - binary_accuracy: 0.5902

 63/300 [=====>........................] - ETA: 9s - loss: 5.0552 - binary_accuracy: 0.6032

 65/300 [=====>........................] - ETA: 9s - loss: 5.0200 - binary_accuracy: 0.6000

 67/300 [=====>........................] - ETA: 9s - loss: 5.2247 - binary_accuracy: 0.5970

 69/300 [=====>........................] - ETA: 9s - loss: 5.0732 - binary_accuracy: 0.6087









































































































































































































































Run:  1 / Accuracy at epoch  1  is:  0.5400000214576721 

  1/300 [..............................] - ETA: 12s - loss: 5.6644e-04 - binary_accuracy: 1.0000

  3/300 [..............................] - ETA: 12s - loss: 1.8881e-04 - binary_accuracy: 1.0000

  5/300 [..............................] - ETA: 12s - loss: 1.1839e-04 - binary_accuracy: 1.0000

  7/300 [..............................] - ETA: 11s - loss: 8.4562e-05 - binary_accuracy: 1.0000

  9/300 [..............................] - ETA: 11s - loss: 1.2835 - binary_accuracy: 0.8889    

 11/300 [>.............................] - ETA: 11s - loss: 1.0501 - binary_accuracy: 0.9091

 13/300 [>.............................] - ETA: 11s - loss: 1.8317 - binary_accuracy: 0.8462

 15/300 [>.............................] - ETA: 11s - loss: 1.5875 - binary_accuracy: 0.8667

 17/300 [>.............................] - ETA: 11s - loss: 1.4007 - binary_accuracy: 0.8824

 19/300 [>.............................] - ETA: 11s - loss: 1.3839 - binary_accuracy: 0.8421

 21/300 [=>............................] - ETA: 11s - loss: 1.2521 - binary_accuracy: 0.8571

 23/300 [=>............................] - ETA: 11s - loss: 1.1432 - binary_accuracy: 0.8696

 25/300 [=>............................] - ETA: 11s - loss: 1.0517 - binary_accuracy: 0.8800

 27/300 [=>............................] - ETA: 11s - loss: 0.9738 - binary_accuracy: 0.8889

 29/300 [=>............................] - ETA: 11s - loss: 0.9610 - binary_accuracy: 0.8621

 31/300 [==>...........................] - ETA: 10s - loss: 0.8990 - binary_accuracy: 0.8710

 33/300 [==>...........................] - ETA: 10s - loss: 0.8445 - binary_accuracy: 0.8788

 35/300 [==>...........................] - ETA: 10s - loss: 0.7962 - binary_accuracy: 0.8857

 37/300 [==>...........................] - ETA: 10s - loss: 0.7532 - binary_accuracy: 0.8919

 39/300 [==>...........................] - ETA: 10s - loss: 0.7146 - binary_accuracy: 0.8974

 41/300 [===>..........................] - ETA: 10s - loss: 0.6797 - binary_accuracy: 0.9024

 43/300 [===>..........................] - ETA: 10s - loss: 0.6481 - binary_accuracy: 0.9070

 45/300 [===>..........................] - ETA: 10s - loss: 0.6193 - binary_accuracy: 0.9111

 47/300 [===>..........................] - ETA: 10s - loss: 0.5929 - binary_accuracy: 0.9149

 49/300 [===>..........................] - ETA: 10s - loss: 0.5687 - binary_accuracy: 0.9184

 51/300 [====>.........................] - ETA: 10s - loss: 0.5464 - binary_accuracy: 0.9216

 53/300 [====>.........................] - ETA: 10s - loss: 0.6370 - binary_accuracy: 0.9057

 55/300 [====>.........................] - ETA: 9s - loss: 0.7616 - binary_accuracy: 0.8909 

 57/300 [====>.........................] - ETA: 9s - loss: 0.7349 - binary_accuracy: 0.8947

 59/300 [====>.........................] - ETA: 9s - loss: 0.7538 - binary_accuracy: 0.8814

 61/300 [=====>........................] - ETA: 9s - loss: 1.3083 - binary_accuracy: 0.8689

 63/300 [=====>........................] - ETA: 9s - loss: 1.2672 - binary_accuracy: 0.8730

 65/300 [=====>........................] - ETA: 9s - loss: 1.2282 - binary_accuracy: 0.8769

 67/300 [=====>........................] - ETA: 9s - loss: 1.1916 - binary_accuracy: 0.8806

 69/300 [=====>........................] - ETA: 9s - loss: 1.1570 - binary_accuracy: 0.8841











































































































































































































































Run:  1 / Accuracy at epoch  2  is:  0.5299999713897705 

  1/300 [..............................] - ETA: 12s - loss: 7.4803e-27 - binary_accuracy: 1.0000

  3/300 [..............................] - ETA: 12s - loss: 2.8907 - binary_accuracy: 0.6667    

  5/300 [..............................] - ETA: 12s - loss: 1.7344 - binary_accuracy: 0.8000

  7/300 [..............................] - ETA: 12s - loss: 1.2389 - binary_accuracy: 0.8571

  9/300 [..............................] - ETA: 11s - loss: 7.4547 - binary_accuracy: 0.7778

 11/300 [>.............................] - ETA: 12s - loss: 6.0993 - binary_accuracy: 0.8182

 13/300 [>.............................] - ETA: 11s - loss: 5.1610 - binary_accuracy: 0.8462

 15/300 [>.............................] - ETA: 11s - loss: 4.4728 - binary_accuracy: 0.8667

 17/300 [>.............................] - ETA: 11s - loss: 3.9466 - binary_accuracy: 0.8824

 19/300 [>.............................] - ETA: 11s - loss: 3.5312 - binary_accuracy: 0.8947

 21/300 [=>............................] - ETA: 11s - loss: 3.1949 - binary_accuracy: 0.9048

 23/300 [=>............................] - ETA: 11s - loss: 2.9176 - binary_accuracy: 0.9130

 25/300 [=>............................] - ETA: 11s - loss: 2.6842 - binary_accuracy: 0.9200

 27/300 [=>............................] - ETA: 11s - loss: 2.7286 - binary_accuracy: 0.8889

 29/300 [=>............................] - ETA: 11s - loss: 2.5404 - binary_accuracy: 0.8966

 31/300 [==>...........................] - ETA: 11s - loss: 2.3765 - binary_accuracy: 0.9032

 33/300 [==>...........................] - ETA: 10s - loss: 2.2325 - binary_accuracy: 0.9091

 35/300 [==>...........................] - ETA: 10s - loss: 2.1049 - binary_accuracy: 0.9143

 37/300 [==>...........................] - ETA: 10s - loss: 1.9912 - binary_accuracy: 0.9189

 39/300 [==>...........................] - ETA: 10s - loss: 1.8890 - binary_accuracy: 0.9231

 41/300 [===>..........................] - ETA: 10s - loss: 1.7969 - binary_accuracy: 0.9268

 43/300 [===>..........................] - ETA: 10s - loss: 1.7133 - binary_accuracy: 0.9302

 45/300 [===>..........................] - ETA: 10s - loss: 2.6705 - binary_accuracy: 0.8889

 47/300 [===>..........................] - ETA: 10s - loss: 2.5568 - binary_accuracy: 0.8936

 49/300 [===>..........................] - ETA: 10s - loss: 2.4525 - binary_accuracy: 0.8980

 51/300 [====>.........................] - ETA: 10s - loss: 2.3563 - binary_accuracy: 0.9020

 53/300 [====>.........................] - ETA: 10s - loss: 2.2674 - binary_accuracy: 0.9057

 55/300 [====>.........................] - ETA: 9s - loss: 2.1849 - binary_accuracy: 0.9091 

 57/300 [====>.........................] - ETA: 9s - loss: 2.1083 - binary_accuracy: 0.9123

 59/300 [====>.........................] - ETA: 9s - loss: 2.0368 - binary_accuracy: 0.9153

 61/300 [=====>........................] - ETA: 9s - loss: 1.9700 - binary_accuracy: 0.9180

 63/300 [=====>........................] - ETA: 9s - loss: 1.9075 - binary_accuracy: 0.9206

 65/300 [=====>........................] - ETA: 9s - loss: 1.8488 - binary_accuracy: 0.9231

 67/300 [=====>........................] - ETA: 9s - loss: 1.7936 - binary_accuracy: 0.9254

 69/300 [=====>........................] - ETA: 9s - loss: 1.7416 - binary_accuracy: 0.9275

























## Training and evaluation of Spanish model

In [None]:
runs_accuracy = []
for run in range(1,(num_runs+1)):
    epochs_accuracy=[]
    model_es = create_compile_model_es()
    
    for epoch in range (0,num_epochs_per_run):
        history = model_es.fit(
          raw_train_ds_es,
          validation_data = raw_test_ds_es,
          epochs=1,
          shuffle=False,
          # Comment the following line to do not save and download the model.
          #callbacks=[callbacks]
          )
        accuracy = history.history['val_binary_accuracy']
        print("Run: ",run,"/ Accuracy at epoch ",epoch," is: ", accuracy[0],"\n")
        epochs_accuracy.append(accuracy[0])

    print("Accuracies over epochs:",epochs_accuracy,"\n\n")
    runs_accuracy.append(max(epochs_accuracy))

runs_accuracy.sort()
print("\n\n Over all runs maximum accuracies on Spanish are:", runs_accuracy)
print("The median for SpanishEnglish is:",runs_accuracy[2],"\n\n\n")
