In [1]:
pip install imbalanced-learn


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from imblearn.over_sampling import SMOTE
import nltk

# Ensure NLTK stopwords are downloaded
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Moneykicks\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
# Load data from CSV file
df = pd.read_csv('train_stances.csv')

# Keep only 'Headline' and 'Stance' columns
df = df[['Headline', 'Stance']]

# Drop rows where 'Headline' or 'Stance' is NaN
df = df.dropna(subset=['Headline', 'Stance'])

# Ensure all entries in 'Headline' are strings
df['Headline'] = df['Headline'].astype(str)

# For full review display without truncation
pd.set_option('display.max_colwidth', None)

# Print value counts of 'Stance' column
print(df['Stance'].value_counts())

# Preprocess the text data
stop_words = list(stopwords.words('english')) 
tfidf = TfidfVectorizer(stop_words=stop_words, max_df=0.7)
X = tfidf.fit_transform(df['Headline'])

# Encode the target labels
y = df['Stance']


Stance
unrelated    36545
discuss       8909
agree         3678
disagree       840
Name: count, dtype: int64


In [4]:
# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X_smote, y_smote = smote.fit_resample(X, y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)


In [5]:
# Naive Bayes classifier
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_y_pred = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_y_pred)
nb_report = classification_report(y_test, nb_y_pred)
print("Naive Bayes Results:")
print(f"Accuracy: {nb_accuracy}")
print("Classification Report:")
print(nb_report)

# SVM classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
svm_y_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_y_pred)
svm_report = classification_report(y_test, svm_y_pred)
print("SVM Results:")
print(f"Accuracy: {svm_accuracy}")
print("Classification Report:")
print(svm_report)

# Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_report = classification_report(y_test, rf_y_pred)
print("Random Forest Results:")
print(f"Accuracy: {rf_accuracy}")
print("Classification Report:")
print(rf_report)

# Compare the accuracy of all models
accuracy_scores = {
    'Naive Bayes': nb_accuracy,
    'SVM': svm_accuracy,
    'Random Forest': rf_accuracy
}

# Find the best model based on accuracy
best_model = max(accuracy_scores, key=accuracy_scores.get)
print(f"\nThe best model based on accuracy is: {best_model} with accuracy {accuracy_scores[best_model]}")


Naive Bayes Results:
Accuracy: 0.5935832535230537
Classification Report:
              precision    recall  f1-score   support

       agree       0.62      0.59      0.60      7246
    disagree       0.66      0.89      0.76      7341
     discuss       0.52      0.69      0.59      7297
   unrelated       0.56      0.20      0.29      7352

    accuracy                           0.59     29236
   macro avg       0.59      0.59      0.56     29236
weighted avg       0.59      0.59      0.56     29236



In [1]:
# Install necessary libraries
# !pip install googletrans==4.0.0-rc1 pandas scikit-learn tensorflow imbalanced-learn nlpaug

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Dense, Bidirectional, SpatialDropout1D
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import SMOTE
import re
from googletrans import Translator

# Initialize the translator
translator = Translator()

# Load data from CSV file
df = pd.read_csv('train_stances.csv')

# Keep only 'Headline' and 'Stance' columns
df = df[['Headline', 'Stance']]

# Drop rows where 'Headline' or 'Stance' is NaN
df = df.dropna(subset=['Headline', 'Stance'])

# Ensure all entries in 'Headline' are strings
df['Headline'] = df['Headline'].astype(str)

# For full review display without truncation
pd.set_option('display.max_colwidth', None)

# Print value counts of 'Stance' column
print(df['Stance'].value_counts())

# Text Preprocessing
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Ensure NLTK stopwords and WordNet lemmatizer are downloaded
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words])  # Lemmatize and remove stopwords
    return text

df['Headline'] = df['Headline'].apply(preprocess_text)

# Define back-translation function
def back_translate(text, src_language='en', mid_language='fr'):
    try:
        translated_text = translator.translate(text, src=src_language, dest=mid_language).text
        back_translated_text = translator.translate(translated_text, src=mid_language, dest=src_language).text
        return back_translated_text
    except Exception as e:
        print(f"Error during back-translation: {e}")
        return text

# Define function to augment minority class
def augment_minority_class(df, class_label, src_language='en', mid_language='fr'):
    minority_texts = df[df['Stance'] == class_label]['Headline'].tolist()
    augmented_texts = [back_translate(text, src_language, mid_language) for text in minority_texts]
    augmented_labels = [class_label] * len(augmented_texts)
    return pd.DataFrame({'Headline': augmented_texts, 'Stance': augmented_labels})

# Define threshold for minority class
threshold = 100  # Adjust based on your needs

# Identify minority classes
class_counts = df['Stance'].value_counts()
minority_classes = class_counts[class_counts < threshold].index.tolist()

# Apply back-translation for all minority classes
augmented_df_list = [augment_minority_class(df, cls) for cls in minority_classes]
df_augmented = pd.concat([df] + augmented_df_list, ignore_index=True)

# Text Vectorization for ML models
tfidf = TfidfVectorizer(max_df=0.7)
X = tfidf.fit_transform(df_augmented['Headline'])
y = df_augmented['Stance']

# Handle class imbalance using SMOTE for traditional ML models
smote = SMOTE(random_state=42)
X_smote, y_smote = smote.fit_resample(X, y)

# Split dataset for ML models
X_train_ml, X_test_ml, y_train_ml, y_test_ml = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)

# Train and evaluate ML models
models = {
    'Naive Bayes': MultinomialNB(),
    'SVM': SVC(kernel='linear'),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

for name, model in models.items():
    model.fit(X_train_ml, y_train_ml)
    y_pred = model.predict(X_test_ml)
    accuracy = accuracy_score(y_test_ml, y_pred)
    report = classification_report(y_test_ml, y_pred)
    print(f"{name} Results:")
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(report)

# Tokenizer and padding for deep learning models
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df_augmented['Headline'])
X_seq = tokenizer.texts_to_sequences(df_augmented['Headline'])
X_pad = pad_sequences(X_seq, maxlen=100)
y_encoded = pd.get_dummies(df_augmented['Stance']).values

# Split dataset for deep learning models
X_train_dl, X_test_dl, y_train_dl, y_test_dl = train_test_split(X_pad, y_encoded, test_size=0.2, random_state=42)

# Define and train CNN model
cnn_model = Sequential([
    Embedding(5000, 128, input_length=100),
    Conv1D(filters=128, kernel_size=5, padding='same', activation='relu'),
    GlobalMaxPooling1D(),
    Dense(128, activation='relu'),
    Dense(y_encoded.shape[1], activation='softmax')
])
cnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn_model.fit(X_train_dl, y_train_dl, epochs=5, batch_size=64, validation_data=(X_test_dl, y_test_dl), verbose=2)

cnn_y_pred = cnn_model.predict(X_test_dl)
cnn_y_pred_labels = cnn_y_pred.argmax(axis=1)
y_test_dl_labels = y_test_dl.argmax(axis=1)
cnn_accuracy = accuracy_score(y_test_dl_labels, cnn_y_pred_labels)
cnn_report = classification_report(y_test_dl_labels, cnn_y_pred_labels)
print("CNN Results:")
print(f"Accuracy: {cnn_accuracy}")
print("Classification Report:")
print(cnn_report)

# Define and train LSTM model
lstm_model = Sequential([
    Embedding(5000, 128, input_length=100),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(y_encoded.shape[1], activation='softmax')
])
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.fit(X_train_dl, y_train_dl, epochs=5, batch_size=64, validation_data=(X_test_dl, y_test_dl), verbose=2)

lstm_y_pred = lstm_model.predict(X_test_dl)
lstm_y_pred_labels = lstm_y_pred.argmax(axis=1)
lstm_accuracy = accuracy_score(y_test_dl_labels, lstm_y_pred_labels)
lstm_report = classification_report(y_test_dl_labels, lstm_y_pred_labels)
print("LSTM Results:")
print(f"Accuracy: {lstm_accuracy}")
print("Classification Report:")
print(lstm_report)

# Define and train BiLSTM model
bilstm_model = Sequential([
    Embedding(5000, 128, input_length=100),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)),
    Dense(y_encoded.shape[1], activation='softmax')
])
bilstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
bilstm_model.fit(X_train_dl, y_train_dl, epochs=5, batch_size=64, validation_data=(X_test_dl, y_test_dl), verbose=2)

bilstm_y_pred = bilstm_model.predict(X_test_dl)
bilstm_y_pred_labels = bilstm_y_pred.argmax(axis=1)
bilstm_accuracy = accuracy_score(y_test_dl_labels, bilstm_y_pred_labels)
bilstm_report = classification_report(y_test_dl_labels, bilstm_y_pred_labels)
print("BiLSTM Results:")
print(f"Accuracy: {bilstm_accuracy}")
print("Classification Report:")
print(bilstm_report)

# Compare all models
accuracy_scores_all = {
    'Naive Bayes': accuracy_score(y_test_ml, models['Naive Bayes'].predict(X_test_ml)),
    'SVM': accuracy_score(y_test_ml, models['SVM'].predict(X_test_ml)),
    'Random Forest': accuracy_score(y_test_ml, models['Random Forest'].predict(X_test_ml)),
    'CNN': cnn_accuracy,
    'LSTM': lstm_accuracy,
    'BiLSTM': bilstm_accuracy
}

best_model = max(accuracy_scores_all, key=accuracy_scores_all.get)
print(f"\nThe best model based on accuracy is: {best_model} with accuracy {accuracy_scores_all[best_model]}")


Stance
unrelated    36545
discuss       8909
agree         3678
disagree       840
Name: count, dtype: int64


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Moneykicks\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Moneykicks\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Naive Bayes Results:
Accuracy: 0.5923176905185388
Classification Report:
              precision    recall  f1-score   support

       agree       0.62      0.58      0.60      7246
    disagree       0.66      0.89      0.76      7341
     discuss       0.52      0.69      0.59      7297
   unrelated       0.55      0.20      0.30      7352

    accuracy                           0.59     29236
   macro avg       0.59      0.59      0.56     29236
weighted avg       0.59      0.59      0.56     29236

SVM Results:
Accuracy: 0.6427691886715009
Classification Report:
              precision    recall  f1-score   support

       agree       0.61      0.70      0.65      7246
    disagree       0.68      0.94      0.79      7341
     discuss       0.64      0.62      0.63      7297
   unrelated       0.60      0.31      0.41      7352

    accuracy                           0.64     29236
   macro avg       0.63      0.64      0.62     29236
weighted avg       0.63      0.64      0.62    



Epoch 1/5
625/625 - 18s - 29ms/step - accuracy: 0.7296 - loss: 0.7449 - val_accuracy: 0.7337 - val_loss: 0.7056
Epoch 2/5
625/625 - 18s - 29ms/step - accuracy: 0.7291 - loss: 0.6853 - val_accuracy: 0.7337 - val_loss: 0.6979
Epoch 3/5
625/625 - 18s - 28ms/step - accuracy: 0.7295 - loss: 0.6689 - val_accuracy: 0.7229 - val_loss: 0.7040
Epoch 4/5
625/625 - 16s - 26ms/step - accuracy: 0.7294 - loss: 0.6613 - val_accuracy: 0.7337 - val_loss: 0.7190
Epoch 5/5
625/625 - 15s - 25ms/step - accuracy: 0.7297 - loss: 0.6567 - val_accuracy: 0.7337 - val_loss: 0.7149
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
CNN Results:
Accuracy: 0.7336668334167084
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       703
           1       0.00      0.00      0.00       180
           2       0.00      0.00      0.00      1779
           3       0.73      1.00      0.85      7333

    accuracy                

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


625/625 - 49s - 78ms/step - accuracy: 0.7298 - loss: 0.7660 - val_accuracy: 0.7337 - val_loss: 0.7247
Epoch 2/5
625/625 - 46s - 73ms/step - accuracy: 0.7297 - loss: 0.7007 - val_accuracy: 0.7337 - val_loss: 0.7040
Epoch 3/5
625/625 - 46s - 73ms/step - accuracy: 0.7304 - loss: 0.6810 - val_accuracy: 0.7337 - val_loss: 0.7038
Epoch 4/5
625/625 - 48s - 76ms/step - accuracy: 0.7297 - loss: 0.6716 - val_accuracy: 0.7335 - val_loss: 0.7034
Epoch 5/5
625/625 - 48s - 77ms/step - accuracy: 0.7298 - loss: 0.6651 - val_accuracy: 0.7334 - val_loss: 0.7066
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step
LSTM Results:
Accuracy: 0.7333666833416709
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       703
           1       0.00      0.00      0.00       180
           2       0.20      0.00      0.00      1779
           3       0.73      1.00      0.85      7333

    accuracy                        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


625/625 - 80s - 128ms/step - accuracy: 0.7301 - loss: 0.7563 - val_accuracy: 0.7337 - val_loss: 0.7177
Epoch 2/5
625/625 - 69s - 110ms/step - accuracy: 0.7307 - loss: 0.6961 - val_accuracy: 0.7337 - val_loss: 0.7051
Epoch 3/5
625/625 - 69s - 111ms/step - accuracy: 0.7298 - loss: 0.6788 - val_accuracy: 0.7337 - val_loss: 0.7051
Epoch 4/5
625/625 - 66s - 106ms/step - accuracy: 0.7304 - loss: 0.6707 - val_accuracy: 0.7328 - val_loss: 0.7032
Epoch 5/5
625/625 - 64s - 103ms/step - accuracy: 0.7296 - loss: 0.6648 - val_accuracy: 0.7327 - val_loss: 0.7033
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step
BiLSTM Results:
Accuracy: 0.7326663331665833
Classification Report:
              precision    recall  f1-score   support

           0       0.33      0.00      0.00       703
           1       0.00      0.00      0.00       180
           2       0.37      0.01      0.01      1779
           3       0.73      1.00      0.85      7333

    accuracy                 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



The best model based on accuracy is: CNN with accuracy 0.7336668334167084


In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Dense, Bidirectional, SpatialDropout1D
from imblearn.over_sampling import SMOTE
import re
from googletrans import Translator 
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk

# Load NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize Translator
translator = Translator()

# Load the original dataset
df_original = pd.read_csv('train_stances.csv')

# Keep  'Headline', and 'Stance' columns
df = df_original[[ 'Headline', 'Stance']]

# Drop rows where any of the required columns are NaN
df = df.dropna(subset=[ 'Headline', 'Stance'])

# Ensure all entries in 'Headline' are strings
df['Headline'] = df['Headline'].astype(str)

# For full review display without truncation
pd.set_option('display.max_colwidth', None)

# Print value counts of 'Stance' column
print(df['Stance'].value_counts())

# Text Preprocessing
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words])  # Lemmatize and remove stopwords
    return text

df['Headline'] = df['Headline'].apply(preprocess_text)

# Define back-translation function
def back_translate(text, src_language='en', mid_language='fr'):
    try:
        translated_text = translator.translate(text, src=src_language, dest=mid_language).text
        back_translated_text = translator.translate(translated_text, src=mid_language, dest=src_language).text
        return back_translated_text
    except Exception as e:
        print(f"Error during back-translation: {e}")
        return text

# Define function to augment minority class
def augment_minority_class(df, class_label, src_language='en', mid_language='fr'):
    minority_texts = df[df['Stance'] == class_label]['Headline'].tolist()
    augmented_texts = [back_translate(text, src_language, mid_language) for text in minority_texts]
    augmented_labels = [class_label] * len(augmented_texts)
    return pd.DataFrame({'Headline': augmented_texts, 'Stance': augmented_labels})

# Define threshold for minority class
threshold = 100  # Adjust based on your needs

# Identify minority classes
class_counts = df['Stance'].value_counts()
minority_classes = class_counts[class_counts < threshold].index.tolist()

# Apply back-translation for all minority classes
augmented_df_list = [augment_minority_class(df, cls) for cls in minority_classes]
df_augmented = pd.concat([df] + augmented_df_list, ignore_index=True)

# Text Vectorization for ML models
tfidf = TfidfVectorizer(max_df=0.7)
X = tfidf.fit_transform(df_augmented['Headline'])
y = df_augmented['Stance']

# Handle class imbalance using SMOTE for traditional ML models
smote = SMOTE(random_state=42)
X_smote, y_smote = smote.fit_resample(X, y)

# Split dataset for ML models
X_train_ml, X_test_ml, y_train_ml, y_test_ml = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)

# Train and evaluate ML models
models = {
    'Naive Bayes': MultinomialNB(),
    'SVM': SVC(kernel='linear', probability=True),  # Set probability=True for SVM
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

# Confidence threshold
confidence_threshold = 0.8

def trust_based_accuracy(y_true, y_pred, confidence_scores, threshold):
    mask = confidence_scores >= threshold
    y_true_trust = y_true[mask]
    y_pred_trust = y_pred[mask]
    if len(y_true_trust) == 0:
        return None  # No predictions above the threshold
    return accuracy_score(y_true_trust, y_pred_trust)

for name, model in models.items():
    model.fit(X_train_ml, y_train_ml)
    y_pred = model.predict(X_test_ml)
    y_probs = model.predict_proba(X_test_ml)
    confidence_scores = y_probs.max(axis=1)
    
    # Calculate standard accuracy
    accuracy = accuracy_score(y_test_ml, y_pred)
    report = classification_report(y_test_ml, y_pred)
    print(f"{name} Results:")
    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(report)
    
    # Calculate trust-based accuracy
    trust_accuracy = trust_based_accuracy(y_test_ml, y_pred, confidence_scores, confidence_threshold)
    if trust_accuracy is not None:
        print(f"Trust-Based Accuracy for {name}: {trust_accuracy}")
    else:
        print(f"No predictions above the confidence threshold for {name}")

# Tokenizer and padding for deep learning models
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df_augmented['Headline'])
X_seq = tokenizer.texts_to_sequences(df_augmented['Headline'])
X_pad = pad_sequences(X_seq, maxlen=100)
y_encoded = pd.get_dummies(df_augmented['Stance']).values

# Split dataset for deep learning models
X_train_dl, X_test_dl, y_train_dl, y_test_dl = train_test_split(X_pad, y_encoded, test_size=0.2, random_state=42)

# Define and train CNN model
cnn_model = Sequential([
    Embedding(5000, 128, input_length=100),
    Conv1D(filters=128, kernel_size=5, padding='same', activation='relu'),
    GlobalMaxPooling1D(),
    Dense(128, activation='relu'),
    Dense(y_encoded.shape[1], activation='softmax')
])
cnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn_model.fit(X_train_dl, y_train_dl, epochs=5, batch_size=64, validation_data=(X_test_dl, y_test_dl), verbose=2)

cnn_y_pred = cnn_model.predict(X_test_dl)
cnn_y_pred_labels = cnn_y_pred.argmax(axis=1)
y_test_dl_labels = y_test_dl.argmax(axis=1)
cnn_accuracy = accuracy_score(y_test_dl_labels, cnn_y_pred_labels)
cnn_report = classification_report(y_test_dl_labels, cnn_y_pred_labels)
print("CNN Results:")
print(f"Accuracy: {cnn_accuracy}")
print("Classification Report:")
print(cnn_report)

# Calculate confidence scores for CNN
cnn_confidence_scores = cnn_y_pred.max(axis=1)

# Calculate trust-based accuracy for CNN
cnn_trust_accuracy = trust_based_accuracy(y_test_dl_labels, cnn_y_pred_labels, cnn_confidence_scores, confidence_threshold)
if cnn_trust_accuracy is not None:
    print(f"Trust-Based Accuracy for CNN: {cnn_trust_accuracy}")
else:
    print(f"No predictions above the confidence threshold for CNN")

# Define and train LSTM model
lstm_model = Sequential([
    Embedding(5000, 128, input_length=100),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(y_encoded.shape[1], activation='softmax')
])
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.fit(X_train_dl, y_train_dl, epochs=5, batch_size=64, validation_data=(X_test_dl, y_test_dl), verbose=2)

lstm_y_pred = lstm_model.predict(X_test_dl)
lstm_y_pred_labels = lstm_y_pred.argmax(axis=1)
lstm_accuracy = accuracy_score(y_test_dl_labels, lstm_y_pred_labels)
lstm_report = classification_report(y_test_dl_labels, lstm_y_pred_labels)
print("LSTM Results:")
print(f"Accuracy: {lstm_accuracy}")
print("Classification Report:")
print(lstm_report)

# Calculate confidence scores for LSTM
lstm_confidence_scores = lstm_y_pred.max(axis=1)

# Calculate trust-based accuracy for LSTM
lstm_trust_accuracy = trust_based_accuracy(y_test_dl_labels, lstm_y_pred_labels, lstm_confidence_scores, confidence_threshold)
if lstm_trust_accuracy is not None:
    print(f"Trust-Based Accuracy for LSTM: {lstm_trust_accuracy}")
else:
    print(f"No predictions above the confidence threshold for LSTM")

# Define and train BiLSTM model
bilstm_model = Sequential([
    Embedding(5000, 128, input_length=100),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)),
    Dense(y_encoded.shape[1], activation='softmax')
])
bilstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
bilstm_model.fit(X_train_dl, y_train_dl, epochs=5, batch_size=64, validation_data=(X_test_dl, y_test_dl), verbose=2)

bilstm_y_pred = bilstm_model.predict(X_test_dl)
bilstm_y_pred_labels = bilstm_y_pred.argmax(axis=1)
bilstm_accuracy = accuracy_score(y_test_dl_labels, bilstm_y_pred_labels)
bilstm_report = classification_report(y_test_dl_labels, bilstm_y_pred_labels)
print("BiLSTM Results:")
print(f"Accuracy: {bilstm_accuracy}")
print("Classification Report:")
print(bilstm_report)

# Calculate confidence scores for BiLSTM
bilstm_confidence_scores = bilstm_y_pred.max(axis=1)

# Calculate trust-based accuracy for BiLSTM
bilstm_trust_accuracy = trust_based_accuracy(y_test_dl_labels, bilstm_y_pred_labels, bilstm_confidence_scores, confidence_threshold)
if bilstm_trust_accuracy is not None:
    print(f"Trust-Based Accuracy for BiLSTM: {bilstm_trust_accuracy}")
else:
    print(f"No predictions above the confidence threshold for BiLSTM")

# Compare all models
accuracy_scores_all = {
    'Naive Bayes': accuracy_score(y_test_ml, models['Naive Bayes'].predict(X_test_ml)),
    'SVM': accuracy_score(y_test_ml, models['SVM'].predict(X_test_ml)),
    'Random Forest': accuracy_score(y_test_ml, models['Random Forest'].predict(X_test_ml)),
    'CNN': cnn_accuracy,
    'LSTM': lstm_accuracy,
    'BiLSTM': bilstm_accuracy
}

best_model = max(accuracy_scores_all, key=accuracy_scores_all.get)
print(f"\nThe best model based on accuracy is: {best_model} with accuracy {accuracy_scores_all[best_model]}")

trust_accuracy_scores_all = {
    'Naive Bayes': trust_based_accuracy(y_test_ml, models['Naive Bayes'].predict(X_test_ml), models['Naive Bayes'].predict_proba(X_test_ml).max(axis=1), confidence_threshold),
    'SVM': trust_based_accuracy(y_test_ml, models['SVM'].predict(X_test_ml), models['SVM'].predict_proba(X_test_ml).max(axis=1), confidence_threshold),
    'Random Forest': trust_based_accuracy(y_test_ml, models['Random Forest'].predict(X_test_ml), models['Random Forest'].predict_proba(X_test_ml).max(axis=1), confidence_threshold),
    'CNN': cnn_trust_accuracy,
    'LSTM': lstm_trust_accuracy,
    'BiLSTM': bilstm_trust_accuracy
}

best_trust_model = max(trust_accuracy_scores_all, key=lambda k: trust_accuracy_scores_all[k] if trust_accuracy_scores_all[k] is not None else -1)
print(f"\nThe best model based on trust-based accuracy is: {best_trust_model} with trust-based accuracy {trust_accuracy_scores_all[best_trust_model]}")


[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
[nltk_data] Error loading wordnet: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


Stance
unrelated    36545
discuss       8909
agree         3678
disagree       840
Name: count, dtype: int64
Naive Bayes Results:
Accuracy: 0.5923176905185388
Classification Report:
              precision    recall  f1-score   support

       agree       0.62      0.58      0.60      7246
    disagree       0.66      0.89      0.76      7341
     discuss       0.52      0.69      0.59      7297
   unrelated       0.55      0.20      0.30      7352

    accuracy                           0.59     29236
   macro avg       0.59      0.59      0.56     29236
weighted avg       0.59      0.59      0.56     29236

Trust-Based Accuracy for Naive Bayes: 0.7245418477234082
SVM Results:
Accuracy: 0.6427691886715009
Classification Report:
              precision    recall  f1-score   support

       agree       0.61      0.70      0.65      7246
    disagree       0.68      0.94      0.79      7341
     discuss       0.64      0.62      0.63      7297
   unrelated       0.60      0.31      0.41 



Epoch 1/5
625/625 - 16s - 26ms/step - accuracy: 0.7288 - loss: 0.7488 - val_accuracy: 0.7337 - val_loss: 0.7086
Epoch 2/5
625/625 - 15s - 23ms/step - accuracy: 0.7288 - loss: 0.6863 - val_accuracy: 0.7337 - val_loss: 0.6965
Epoch 3/5
625/625 - 15s - 24ms/step - accuracy: 0.7291 - loss: 0.6703 - val_accuracy: 0.7331 - val_loss: 0.7018
Epoch 4/5
625/625 - 15s - 24ms/step - accuracy: 0.7294 - loss: 0.6615 - val_accuracy: 0.7337 - val_loss: 0.7029
Epoch 5/5
625/625 - 15s - 24ms/step - accuracy: 0.7293 - loss: 0.6576 - val_accuracy: 0.7337 - val_loss: 0.7103
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
CNN Results:
Accuracy: 0.7336668334167084
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       703
           1       0.00      0.00      0.00       180
           2       0.00      0.00      0.00      1779
           3       0.73      1.00      0.85      7333

    accuracy                

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


625/625 - 43s - 69ms/step - accuracy: 0.7297 - loss: 0.7642 - val_accuracy: 0.7337 - val_loss: 0.7216
Epoch 2/5
625/625 - 41s - 66ms/step - accuracy: 0.7305 - loss: 0.6999 - val_accuracy: 0.7337 - val_loss: 0.7106
Epoch 3/5
625/625 - 41s - 65ms/step - accuracy: 0.7301 - loss: 0.6800 - val_accuracy: 0.7334 - val_loss: 0.7061
Epoch 4/5
625/625 - 41s - 66ms/step - accuracy: 0.7305 - loss: 0.6715 - val_accuracy: 0.7311 - val_loss: 0.7087
Epoch 5/5
625/625 - 41s - 66ms/step - accuracy: 0.7305 - loss: 0.6643 - val_accuracy: 0.7325 - val_loss: 0.7103
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step
LSTM Results:
Accuracy: 0.7324662331165582
Classification Report:
              precision    recall  f1-score   support

           0       0.17      0.00      0.01       703
           1       0.00      0.00      0.00       180
           2       0.00      0.00      0.00      1779
           3       0.73      1.00      0.85      7333

    accuracy                        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


625/625 - 69s - 111ms/step - accuracy: 0.7299 - loss: 0.7560 - val_accuracy: 0.7337 - val_loss: 0.7237
Epoch 2/5
625/625 - 64s - 103ms/step - accuracy: 0.7307 - loss: 0.6963 - val_accuracy: 0.7337 - val_loss: 0.7080
Epoch 3/5
625/625 - 64s - 102ms/step - accuracy: 0.7302 - loss: 0.6783 - val_accuracy: 0.7293 - val_loss: 0.7147
Epoch 4/5
625/625 - 63s - 101ms/step - accuracy: 0.7302 - loss: 0.6705 - val_accuracy: 0.7337 - val_loss: 0.7126
Epoch 5/5
625/625 - 64s - 102ms/step - accuracy: 0.7293 - loss: 0.6646 - val_accuracy: 0.7336 - val_loss: 0.7082
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step
BiLSTM Results:
Accuracy: 0.7335667833916959
Classification Report:
              precision    recall  f1-score   support

           0       0.40      0.00      0.01       703
           1       0.00      0.00      0.00       180
           2       0.00      0.00      0.00      1779
           3       0.73      1.00      0.85      7333

    accuracy                 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



The best model based on accuracy is: CNN with accuracy 0.7336668334167084

The best model based on trust-based accuracy is: Random Forest with trust-based accuracy 0.873517308157831
