In [None]:
!pip install --upgrade tensorflow tensorflow_hub pandas matplotlib

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dataset_url = 'https://raw.githubusercontent.com/diptamath/covid_fake_news/main/data'
dataset_columns = ['id', 'features', 'labels']
dataset_offset = 1

def read_dataset(dataset_name):
  return pd.read_csv(dataset_url + dataset_name, names = dataset_columns, skiprows = lambda index : index < dataset_offset)

In [None]:
dataset_train = read_dataset('/Constraint_Train.csv')
dataset_train.head()

In [None]:
dataset_test = read_dataset('/english_test_with_labels.csv')
dataset_test.head()

In [None]:
dataset_val = read_dataset('/Constraint_Val.csv')
dataset_val.head()

In [None]:
def transform(label):
    result = 1 if label == 'fake' else 0
    return result

def inverse_transform(value):
    result = 'fake' if value >= 0.5 else 'real'
    return result

In [None]:
dataset_train.pop('id')
dataset_test.pop('id')
dataset_val.pop('id')

dataset_train['labels'] = [transform(label) for label in dataset_train['labels']]
dataset_test['labels'] = [transform(label) for label in dataset_test['labels']]
dataset_val['labels'] = [transform(label) for label in dataset_val['labels']]

print(dataset_train)

In [None]:
embedding_layer_name = 'https://tfhub.dev/google/nnlm-en-dim128/2'
embedding_layer = hub.KerasLayer(embedding_layer_name, input_shape = [], dtype = tf.string, trainable = False)

embedding_layer(dataset_train['features'])

In [None]:
model = tf.keras.Sequential([
    embedding_layer,
    tf.keras.layers.Dense(16, activation = 'relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

model.summary()

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.BinaryCrossentropy(from_logits = False), metrics = ['accuracy'])

In [None]:
history = model.fit(
    dataset_train['features'],
    dataset_train['labels'],
    batch_size = 512,
    validation_data = (dataset_val['features'], dataset_val['labels']),
    validation_batch_size = 512,
    epochs = 30,
    verbose = 1
)

In [None]:
history_dict = history.history

accuracy = history_dict['accuracy']
val_accuracy = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(accuracy) + 1)

plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(epochs, loss, 'bo', label = 'Training loss')
plt.plot(epochs, val_loss, 'r', label = 'Validation loss')
plt.legend()

plt.show()

In [None]:
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy')
plt.legend(loc = 'lower right')

plt.show()

In [None]:
model.evaluate(
    dataset_test['features'],
    dataset_test['labels'],
    verbose = 1
)

In [None]:
def predict(inputs):

    outputs = model.predict(inputs)

    for i, o in zip(inputs, outputs):
        print(f'Input: {i}')
        print(f'Output Score: {o[0]} | Output Label: {inverse_transform(o[0])}')

In [None]:
inputs = [
    'The Chinese government announced that "garlic is a preventative food for the the novel coronavirus."',
    'Hydroxychloroquine is the cure for coronavirus.',
    'Mass disinfection of people using a chemical solution will eradicate COVID-19.',
    'The coronavirus was engineered by scientists in a lab.',
    'Practice social distancing to slow the spread of covid.',
    'Wear a mask in public to help prevent the virus.', # most sentences in the dataset using the word "mask" is fake
    'Fever and difficulty breathing are symptoms of coronavirus.'
]

predict(inputs)