In [8]:
# Importing libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SimpleRNN
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Define the dataset
data = {
    "text": [
        "This is a genuine news article",
        "Click here to win $1,000,000!",
        "Breaking: Important event just happened",
        "Cheap medications available online",
        "Trusted source for daily updates",
        "You are a winner! Claim your prize now",
    ],
    "label": [0, 1, 0, 1, 0, 1],  # 0 = genuine, 1 = spam
}

# Split the data into training and testing
texts = data["text"]
labels = np.array(data["label"])

X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Tokenization and padding
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=10, padding='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=10, padding='post')

# Build a generic feedforward neural network model
ffnn_model = Sequential([
    Embedding(input_dim=1000, output_dim=16, input_length=10),
    tf.keras.layers.Flatten(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the feedforward model
ffnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the feedforward model
print("Training Feedforward Neural Network...")
ffnn_model.fit(X_train_padded, y_train, epochs=10, batch_size=2, validation_split=0.2)

# Evaluate the feedforward model
print("Evaluating Feedforward Neural Network...")
ffnn_loss, ffnn_accuracy = ffnn_model.evaluate(X_test_padded, y_test)
print(f"FFNN Test Accuracy: {ffnn_accuracy:.2f}")

# Build an RNN model with LSTM
rnn_model = Sequential([
    Embedding(input_dim=1000, output_dim=16, input_length=10),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the RNN model
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the RNN model
print("Training RNN Model...")
rnn_model.fit(X_train_padded, y_train, epochs=10, batch_size=2, validation_split=0.2)

# Evaluate the RNN model
print("Evaluating RNN Model...")
rnn_loss, rnn_accuracy = rnn_model.evaluate(X_test_padded, y_test)
print(f"RNN Test Accuracy: {rnn_accuracy:.2f}")

# Predictions and classification report for RNN
rnn_predictions = (rnn_model.predict(X_test_padded) > 0.5).astype(int)
print("\nClassification Report for RNN:")
print(classification_report(y_test, rnn_predictions))

# Prepare data for traditional ML models
X_train_ml = X_train_padded.reshape(X_train_padded.shape[0], -1)
X_test_ml = X_test_padded.reshape(X_test_padded.shape[0], -1)

# Build and train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
print("Training Random Forest Model...")
rf_model.fit(X_train_ml, y_train)

# Evaluate Random Forest model
rf_predictions = rf_model.predict(X_test_ml)
print("\nClassification Report for Random Forest:")
print(classification_report(y_test, rf_predictions))

# Build and train XGBoost model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
print("Training XGBoost Model...")
xgb_model.fit(X_train_ml, y_train)

# Evaluate XGBoost model
xgb_predictions = xgb_model.predict(X_test_ml)
print("\nClassification Report for XGBoost:")
print(classification_report(y_test, xgb_predictions))


Training Feedforward Neural Network...
Epoch 1/10




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 200ms/step - accuracy: 1.0000 - loss: 0.6767 - val_accuracy: 0.0000e+00 - val_loss: 0.7246
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 1.0000 - loss: 0.6606 - val_accuracy: 0.0000e+00 - val_loss: 0.7354
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 1.0000 - loss: 0.6488 - val_accuracy: 0.0000e+00 - val_loss: 0.7452
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 1.0000 - loss: 0.6355 - val_accuracy: 0.0000e+00 - val_loss: 0.7563
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 1.0000 - loss: 0.6209 - val_accuracy: 0.0000e+00 - val_loss: 0.7687
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 1.0000 - loss: 0.6079 - val_accuracy: 0.0000e+00 - val_loss: 0.7818
Epoch 7/10
[1m2/2[0m [32m━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
