In [4]:
from config import *
from Training.UnifiedTrainer import UnifiedTrainer
from Training.utils import init_training_components, init_LSTM_with_attention, init_LSTM_without_attention, visualize_results

In [5]:
import pandas as pd

df = pd.read_csv('WELFake_Dataset_processed.tsv', sep='\t')

In [6]:
from sklearn.model_selection import train_test_split

train_texts, temp_text, train_labels, temp_labels = train_test_split(
  df['full_text_processed'],
  df['label'],
  random_state=2018,
  test_size=0.4,
  stratify=df['label']
)
val_texts, test_texts, val_labels, test_labels = train_test_split(
  temp_text,
  temp_labels,
  random_state=2018,
  test_size=0.5,
  stratify=temp_labels
)

In [7]:
import gensim
from Training.utils import makeWords

mod = gensim.models.Word2Vec(sentences=makeWords(df['full_text_processed']), vector_size=EMBEDDING_DIM, window=5, min_count=1, workers=5)

In [8]:
from NewsDatasetLSTM import NewsDatasetLSTM
from torch.utils.data import DataLoader

max_len = 623 
# Instantiate datasets
train_dataset = NewsDatasetLSTM(train_texts, train_labels, mod, max_len=max_len)
val_dataset = NewsDatasetLSTM(val_texts, val_labels, mod, max_len=max_len)
test_dataset = NewsDatasetLSTM(test_texts, test_labels, mod, max_len=max_len)

# Create DataLoaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size,drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [9]:
num_negatives = len(df[df['label'] == 0])
num_positives = len(df[df['label'] == 1])
class_counts = [num_negatives, num_positives]

In [10]:
from Training.CheckpointManager import CheckpointManager

checkpoint_manager = CheckpointManager(
		save_dir='checkpoints',
		save_name='LSTMWithAttention'
	)

### LSTM with Attention results

In [14]:
model1 = init_LSTM_with_attention().to(DEVICE)

optimizer1, scheduler1 = init_training_components(model1)

trainer1 = UnifiedTrainer(
	model=model1,
	optimizer=optimizer1,
	class_counts=class_counts,
	scheduler=scheduler1,
	device=DEVICE,
	grad_clip=1.0,
	early_stopping_patience=EARLY_STOP_PATIENCE,
	save_name = 'LSTMWithAttention'
	)

In [23]:
epoch, metrics, hyperparams, metadata, early_stopping_state = (
	trainer1.checkpoint_manager.load_checkpoint(
	model=model1,
	optimizer=optimizer1,
	scheduler=scheduler1,
	load_type='final')
)

Checkpoint loaded from checkpoints\LSTMWithAttention_final.pt
Resuming from epoch 10


In [24]:
test_results = trainer1.test(test_loader)


Testing with best model checkpoint...
Checkpoint loaded from checkpoints\LSTMWithAttention_best.pt
Resuming from epoch 10


Evaluating: 100%|[34m██████████[0m| 194/194 [00:25<00:00,  7.68it/s]



Test Results:
Loss: 0.2789
Accuracy: 0.8848
F1 Score: 0.8820
Precision: 0.8011
Recall: 0.9812
ROC AUC: 0.9707



### LSTM Model results

In [18]:
model2 = init_LSTM_without_attention().to(DEVICE)

optimizer2, scheduler2 = init_training_components(model2)

trainer2 = UnifiedTrainer(
	model=model2,
	optimizer=optimizer2,
	class_counts=class_counts,
	scheduler=scheduler2,
	device=DEVICE,
	grad_clip=1.0,
	early_stopping_patience=EARLY_STOP_PATIENCE,
	save_name = 'LSTMWithoutAttention'
	)

In [21]:
epoch2, metrics2, hyperparams2, metadata2, early_stopping_state2 = (
	trainer2.checkpoint_manager.load_checkpoint(
	model=model2,
	optimizer=optimizer2,
	scheduler=scheduler2,
	load_type='final')
)

Checkpoint loaded from checkpoints\LSTMWithoutAttention_final.pt
Resuming from epoch 10


In [22]:
test_results2 = trainer2.test(test_loader)


Testing with best model checkpoint...
Checkpoint loaded from checkpoints\LSTMWithoutAttention_best.pt
Resuming from epoch 8


Evaluating: 100%|[34m██████████[0m| 194/194 [00:18<00:00, 10.26it/s]



Test Results:
Loss: 0.7627
Accuracy: 0.6789
F1 Score: 0.7171
Precision: 0.5847
Recall: 0.9270
ROC AUC: 0.7321

