In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Bidirectional, Dense, Dropout, Concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

In [2]:
# Load dataset
file_path = '/content/AMMUSED (1).csv'
data = pd.read_csv(file_path)

In [3]:
# Select relevant columns
data = data[['title', 'comment', 'label_y', 'label_x']]
data = data.dropna()

In [4]:
# Encode labels
stance_encoder = LabelEncoder()
data['stance_label'] = stance_encoder.fit_transform(data['label_y'])
fake_news_encoder = LabelEncoder()

# Convert boolean values to strings before fitting the encoder
data['label_x'] = data['label_x'].astype(str)

data['fake_news_label'] = fake_news_encoder.fit_transform(data['label_x'])  # False=0, True=1

In [5]:
# Split data
X = data[['title', 'comment']]
y_stance = data['stance_label']
y_fake_news = data['fake_news_label']

X_train, X_test, y_stance_train, y_stance_test = train_test_split(X, y_stance, test_size=0.2, random_state=42)
_, _, y_fake_news_train, y_fake_news_test = train_test_split(X, y_fake_news, test_size=0.2, random_state=42)


In [6]:
# Preprocess text
MAX_VOCAB_SIZE = 5000
MAX_SEQUENCE_LENGTH = 100
EMBEDDING_DIM = 50

def preprocess_text(X_train, X_test):
    tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
    tokenizer.fit_on_texts(X_train['title'] + " " + X_train['comment'])

    X_train_seq = tokenizer.texts_to_sequences(X_train['title'] + " " + X_train['comment'])
    X_test_seq = tokenizer.texts_to_sequences(X_test['title'] + " " + X_test['comment'])

    X_train_padded = pad_sequences(X_train_seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
    X_test_padded = pad_sequences(X_test_seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post')

    return X_train_padded, X_test_padded, tokenizer

X_stance_train_padded, X_stance_test_padded, tokenizer = preprocess_text(X_train, X_test)


In [31]:
# Build BiLSTM model for stance detection
def build_bilstm_model(input_length, vocab_size, embedding_dim, num_classes):
    input_layer = Input(shape=(input_length,))
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=input_length)(input_layer)
    lstm_layer = Bidirectional(LSTM(64, return_sequences=False))(embedding_layer)
    dropout_layer = Dropout(0.5)(lstm_layer)
    output_layer = Dense(num_classes, activation='softmax')(dropout_layer)
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

stance_model = build_bilstm_model(MAX_SEQUENCE_LENGTH, MAX_VOCAB_SIZE, EMBEDDING_DIM, len(stance_encoder.classes_))



In [33]:
# Train stance detection model
stance_model.fit(
    X_stance_train_padded, y_stance_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_stance_test_padded, y_stance_test),
    verbose=1
)

[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 113ms/step - accuracy: 0.5908 - loss: 1.0011 - val_accuracy: 0.6401 - val_loss: 0.8837


<keras.src.callbacks.history.History at 0x7d65c7e1b8e0>

In [34]:
# Predict stances
stance_predictions = stance_model.predict(X_stance_test_padded)
stance_predicted_classes = np.argmax(stance_predictions, axis=1)

[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step


In [35]:
# Evaluate stance detection
print("Stance Detection Report:")
print(classification_report(y_stance_test, stance_predicted_classes, target_names=stance_encoder.classes_))

Stance Detection Report:
              precision    recall  f1-score   support

       agree       0.71      0.54      0.61      2273
     comment       0.61      0.87      0.72      3147
    disagree       0.77      0.16      0.27       478
       query       0.00      0.00      0.00       424

    accuracy                           0.64      6322
   macro avg       0.52      0.39      0.40      6322
weighted avg       0.62      0.64      0.60      6322



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [36]:
# Add predicted stances as a feature for fake news detection
X_fake_news_train_padded = np.hstack([X_stance_train_padded, y_stance_train.values.reshape(-1, 1)])
X_fake_news_test_padded = np.hstack([X_stance_test_padded, stance_predicted_classes.reshape(-1, 1)])


In [60]:

def build_fake_news_model(input_length, num_classes):
    input_layer = Input(shape=(input_length,))
    dense_layer = Dense(32, activation='relu')(input_layer)
    output_layer = Dense(num_classes, activation='softmax')(dense_layer)
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

fake_news_model = build_fake_news_model(X_fake_news_train_padded.shape[1], len(fake_news_encoder.classes_))


In [61]:
fake_news_model.fit(
    X_fake_news_train_padded, y_fake_news_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_fake_news_test_padded, y_fake_news_test),
    verbose=1
)

Epoch 1/5
[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6058 - loss: 104.0676 - val_accuracy: 0.6435 - val_loss: 1.6223
Epoch 2/5
[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6568 - loss: 1.0426 - val_accuracy: 0.6430 - val_loss: 0.7004
Epoch 3/5
[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6467 - loss: 0.6763 - val_accuracy: 0.6422 - val_loss: 0.6787
Epoch 4/5
[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6506 - loss: 0.6440 - val_accuracy: 0.6420 - val_loss: 0.6708
Epoch 5/5
[1m791/791[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.6506 - loss: 0.6427 - val_accuracy: 0.6420 - val_loss: 0.6706


<keras.src.callbacks.history.History at 0x7d65c4634be0>

In [62]:
# Predict fake news
fake_news_predictions = fake_news_model.predict(X_fake_news_test_padded)
fake_news_predicted_classes = np.argmax(fake_news_predictions, axis=1)

[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [63]:
# Evaluate fake news detection
print("Fake News Detection Report:")
print(classification_report(y_fake_news_test, fake_news_predicted_classes, target_names=fake_news_encoder.classes_))

Fake News Detection Report:
              precision    recall  f1-score   support

       False       0.70      0.00      0.01      2267
        True       0.64      1.00      0.78      4055

    accuracy                           0.64      6322
   macro avg       0.67      0.50      0.39      6322
weighted avg       0.66      0.64      0.50      6322



In [64]:
from sklearn.metrics import precision_recall_fscore_support
# Predict fake news
fake_news_predictions = fake_news_model.predict(X_fake_news_test_padded)
fake_news_predicted_classes = np.argmax(fake_news_predictions, axis=1)

# Calculate precision, recall, and F1-score
precision, recall, f1, _ = precision_recall_fscore_support(y_fake_news_test, fake_news_predicted_classes, average='weighted')

[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [65]:
print("Fake News Detection Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

Fake News Detection Metrics:
Precision: 0.6628
Recall: 0.6420
F1-score: 0.5036
