In [None]:
from main import DataImporter
import tensorflow as tf


data_importer = DataImporter()
data_importer.import_data()

ds_train = data_importer.get_train_data()
ds_test = data_importer.get_test_data()
ds_validation = data_importer.get_validation_data()

In [None]:
ds_train.info()
print('\n')
ds_test.info()
print('\n')
ds_validation.info()
print('\n')

In [None]:
# Drop specified columns from each dataset
columns_to_drop = ['id', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'context']

ds_train = ds_train.drop(columns=columns_to_drop)
ds_test = ds_test.drop(columns=columns_to_drop) 
ds_validation = ds_validation.drop(columns=columns_to_drop)

# Display results
print("Training Dataset:")
display(ds_train.head())
print("\nTest Dataset:") 
display(ds_test.head())
print("\nValidation Dataset:")
display(ds_validation.head())

In [None]:
ds_train['false_counts'] = ds_train['false_counts'] + ds_train['pants_on_fire_counts']
ds_test['false_counts'] = ds_test['false_counts'] + ds_test['pants_on_fire_counts']
ds_validation['false_counts'] = ds_validation['false_counts'] + ds_validation['pants_on_fire_counts']

# Display results
print("Training Dataset:")
display(ds_train.head())
print("\nTest Dataset:") 
display(ds_test.head())
print("\nValidation Dataset:")
display(ds_validation.head())

In [None]:
ds_train = ds_train.drop('pants_on_fire_counts', axis=1)
ds_test = ds_test.drop('pants_on_fire_counts', axis=1) 
ds_validation = ds_validation.drop('pants_on_fire_counts', axis=1)

# Display results
print("Training Dataset:")
display(ds_train.head())
print("\nTest Dataset:") 
display(ds_test.head())
print("\nValidation Dataset:")
display(ds_validation.head())

In [None]:
from models.modelhelper import ModelHelper
import numpy as np

# Initialize ModelHelper
model_helper = ModelHelper()

# Preprocess text data
train_texts = ds_train['statement'].apply(model_helper.preprocess_text)
test_texts = ds_test['statement'].apply(model_helper.preprocess_text)
val_texts = ds_validation['statement'].apply(model_helper.preprocess_text)

# Convert texts to sequences
train_sequences = model_helper.preprocess_text(train_texts)
test_sequences = model_helper.preprocess_text(test_texts)
val_sequences = model_helper.preprocess_text(val_texts)

# Get truthfulness columns
truthfulness_columns = model_helper.truthfulness_columns
truthfulness_columns.remove('pants_on_fire_counts')

# Get the raw count values for training
train_labels = model_helper.normalize_counts(ds_train)
test_labels = model_helper.normalize_counts(ds_test)
val_labels = model_helper.normalize_counts(ds_validation)

# Create text classification model
vocab_size = 10000  # Matches max_tokens in preprocess_text
embedding_dim = 100
max_sequence_length = 200
num_classes = len(truthfulness_columns)  # Number of truthfulness categories

model = model_helper.create_text_classification_model(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim, 
    max_sequence_length=max_sequence_length,
    num_classes=num_classes
)

%load_ext tensorboard
%tensorboard --logdir models/logs/fit

# Prepare datasets
train_dataset, val_dataset, test_dataset = model_helper.prepare_datasets(
    train_sequences=train_sequences,
    train_labels=train_labels,
    val_sequences=val_sequences,
    val_labels=val_labels,
    test_sequences=test_sequences,
    test_labels=test_labels,
    batch_size=32
)

# Train the model using ModelHelper's train_model method
history = model_helper.train_model(
    model=model,
    train_data=train_dataset,
    validation_data=val_dataset,
    epochs=15,
    batch_size=32
)

# Evaluate on test set
test_metrics = model.evaluate(test_dataset)
print(test_metrics)

# Save the model
model_helper.save_model(model, "text_classification_model_fourcol")

In [None]:
# Load the saved model
loaded_model = model_helper.load_model("text_classification_model_fourcol.keras")

# Make predictions on test dataset
print(test_dataset)
predictions = loaded_model.predict(test_dataset)
# Convert predictions to class labels
predicted_classes = np.argmax(predictions, axis=1)

# Print sample predictions
print("\nSample predictions:")
for i in range(5):
    print(f"Example {i+1}:")
    print(f"Predicted probabilities: {predictions[i]}")
    print(f"Predicted class: {predicted_classes[i]}")
    print(f"Actual class: {np.argmax(test_labels[i])}\n")


In [None]:
test_string = 'The president is a good man'
prediction = loaded_model.predict(model_helper.preprocess_text(test_string))
print(prediction)