In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten, Concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.metrics import classification_report

In [2]:
# Load and preprocess dataset
file_path = '/content/AMMUSED (1).csv'
data = pd.read_csv(file_path)
data = data[['title', 'comment', 'label_y', 'label_x']].dropna()

In [3]:
# Encode labels
stance_encoder = LabelEncoder()
data['stance_label'] = stance_encoder.fit_transform(data['label_y'])
fake_news_encoder = LabelEncoder()
data['fake_news_label'] = fake_news_encoder.fit_transform(data['label_x'])

In [4]:
# Combine title and comment for text inputs
data['combined_text'] = data['title'] + " " + data['comment']

In [5]:
# Split data
X = data['combined_text']
y_stance = data['stance_label']
y_fake_news = data['fake_news_label']

X_train, X_test, y_stance_train, y_stance_test = train_test_split(X, y_stance, test_size=0.2, random_state=42)
_, _, y_fake_news_train, y_fake_news_test = train_test_split(X, y_fake_news, test_size=0.2, random_state=42)

In [6]:
# Tokenize and pad sequences
MAX_VOCAB_SIZE = 5000
MAX_SEQUENCE_LENGTH = 100
EMBEDDING_DIM = 50

tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post')

In [7]:
# Feature Reduction using PCA
pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train_padded)
X_test_pca = pca.transform(X_test_padded)

In [29]:
# Modified build_cnn_lstm_model function (reduced complexity)
def build_cnn_lstm_model(input_dim, embedding_dim, input_length, num_classes):
    input_layer = Input(shape=(input_length,))
    embedding_layer = Embedding(input_dim=input_dim, output_dim=embedding_dim, input_length=input_length)(input_layer)
    # Removed CNN layer
    # cnn_layer = Conv1D(64, kernel_size=3, activation='relu')(embedding_layer)
    # pooling_layer = MaxPooling1D(pool_size=2)(cnn_layer)
    # flatten_layer = Flatten()(pooling_layer)
    lstm_layer = LSTM(50)(embedding_layer)  # Reduced LSTM units
    # concatenated = Concatenate()([flatten_layer, lstm_layer])
    dropout_layer = Dropout(0.7)(lstm_layer)  # Increased dropout rate
    dense_layer = Dense(32, activation='relu')(dropout_layer)  # Reduced dense layer units
    output_layer = Dense(num_classes, activation='softmax')(dense_layer)
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [9]:
stance_model = build_cnn_lstm_model(MAX_VOCAB_SIZE, EMBEDDING_DIM, MAX_SEQUENCE_LENGTH, len(stance_encoder.classes_))



In [32]:
from tensorflow.keras.callbacks import EarlyStopping
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=2)

In [34]:

stance_model.fit(
    X_train_padded[:len(X_train_padded)], y_stance_train[:len(y_stance_train)], 
    epochs=5,
    batch_size=32,
    validation_data=(X_test_padded, y_stance_test),
    verbose=1,
    callbacks=[early_stopping]  
)

[1m396/396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 128ms/step - accuracy: 0.7949 - loss: 0.5150 - val_accuracy: 0.6797 - val_loss: 0.9009


<keras.src.callbacks.history.History at 0x7f417b2d7280>

In [35]:
# Evaluate Stance Detection
print("Stance Detection Classification Report:")
report_stance = classification_report(y_stance_test, stance_predicted_classes, target_names=stance_encoder.classes_, output_dict=True)
print(classification_report(y_stance_test, stance_predicted_classes, target_names=stance_encoder.classes_))
print(f"Stance Detection Accuracy (Test Data): {report_stance['accuracy']:.4f}")  # Print test accuracy



Stance Detection Classification Report:
              precision    recall  f1-score   support

       agree       0.74      0.58      0.65      2273
     comment       0.66      0.85      0.74      3147
    disagree       0.55      0.41      0.47       478
       query       0.54      0.17      0.26       424

    accuracy                           0.67      6322
   macro avg       0.62      0.50      0.53      6322
weighted avg       0.67      0.67      0.65      6322

Stance Detection Accuracy (Test Data): 0.6726


In [36]:
# Evaluate Stance Detection
print("Stance Detection Classification Report:")
print(classification_report(y_stance_test, stance_predicted_classes, target_names=stance_encoder.classes_))


Stance Detection Classification Report:
              precision    recall  f1-score   support

       agree       0.74      0.58      0.65      2273
     comment       0.66      0.85      0.74      3147
    disagree       0.55      0.41      0.47       478
       query       0.54      0.17      0.26       424

    accuracy                           0.67      6322
   macro avg       0.62      0.50      0.53      6322
weighted avg       0.67      0.67      0.65      6322



In [42]:
# Fake News Detection
X_fake_news_train = np.hstack([X_train_padded, y_stance_train.values.reshape(-1, 1)])
X_fake_news_test = np.hstack([X_test_padded, stance_predicted_classes.reshape(-1, 1)])

fake_news_model = build_cnn_lstm_model(MAX_VOCAB_SIZE + 1, EMBEDDING_DIM, X_fake_news_train.shape[1], len(fake_news_encoder.classes_))

fake_news_model.layers[1].units = 25
fake_news_model.layers[3].units = 50
fake_news_model.layers[4].rate = 0.8
fake_news_model.layers[5].units = 16

early_stopping_fake_news = EarlyStopping(monitor='val_loss', patience=2)

fake_news_model.fit(
    X_fake_news_train[:len(X_fake_news_train)//2], y_fake_news_train[:len(y_fake_news_train)//2],  
    epochs=5,
    batch_size=32,
    validation_data=(X_fake_news_test, y_fake_news_test),
    verbose=1,
    callbacks=[early_stopping_fake_news]  
)

[1m396/396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 88ms/step - accuracy: 0.6411 - loss: 0.6258 - val_accuracy: 0.6610 - val_loss: 0.5351


<keras.src.callbacks.history.History at 0x7f416264e5c0>

In [43]:
# Fake News Detection Predictions
fake_news_predictions = fake_news_model.predict(X_fake_news_test)
fake_news_predicted_classes = np.argmax(fake_news_predictions, axis=1)

[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step


In [44]:
 #Evaluate Fake News Detection
print("Fake News Detection Classification Report:")
target_names = [str(class_name) for class_name in fake_news_encoder.classes_]
report_fake_news = classification_report(y_fake_news_test, fake_news_predicted_classes, target_names=target_names, output_dict=True)
print(classification_report(y_fake_news_test, fake_news_predicted_classes, target_names=target_names))
print(f"Fake News Detection Accuracy (Test Data): {report_fake_news['accuracy']:.4f}")  # Print test accuracy

Fake News Detection Classification Report:
              precision    recall  f1-score   support

       False       0.87      0.06      0.12      2267
        True       0.66      0.99      0.79      4055

    accuracy                           0.66      6322
   macro avg       0.76      0.53      0.45      6322
weighted avg       0.73      0.66      0.55      6322

Fake News Detection Accuracy (Test Data): 0.6610
