In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Load dataset
data = pd.read_csv(r"C:\Users\Martine\Downloads\archive (4)\twitter_training.csv")

# Preprocess data
numeric_data = data.select_dtypes(include=[np.number])


# Scale data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(numeric_data)  # Using numeric data only

# Define Autoencoder model
autoencoder = Sequential([
    Dense(64, activation="relu", input_shape=(data_scaled.shape[1],)),
    Dense(32, activation="relu"),
    Dense(16, activation="relu"),
    Dense(32, activation="relu"),
    Dense(64, activation="relu"),
    Dense(data_scaled.shape[1], activation="sigmoid")
])

autoencoder.compile(optimizer="adam", loss="mse")

# Train the autoencoder 
autoencoder.fit(data_scaled, data_scaled, epochs=50, batch_size=32, validation_split=0.1)

# reconstructed data
reconstructed_data = autoencoder.predict(data_scaled)

# Calculate reconstruction error 
reconstruction_error = np.mean(np.abs(data_scaled - reconstructed_data), axis=1)

threshold = np.percentile(reconstruction_error, 95)  # For example, top 5% as anomalies

# Create labels (1 for normal, -1 for anomaly)
y_true = (reconstruction_error <= threshold).astype(int)  # Assume ground truth is based on error

# Train SVM for anomaly detection using reconstruction error
X_train, X_test, y_train, y_test = train_test_split(reconstruction_error.reshape(-1, 1), y_true, test_size=0.3, random_state=42)

# Train One-Class SVM
svm_model = OneClassSVM(kernel="rbf", gamma=0.001, nu=0.05)
svm_model.fit(X_train)

# Predict anomalies
y_pred = svm_model.predict(X_test)
y_pred = np.where(y_pred == 1, 0, 1)  # Convert One-Class SVM output to binary labels (0 = normal, 1 = anomaly)

# Calculate metrics
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0089 - val_loss: 2.6185e-05
Epoch 2/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 2.6632e-05 - val_loss: 3.0433e-05
Epoch 3/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 2.1485e-05 - val_loss: 3.0894e-05
Epoch 4/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 2.0647e-05 - val_loss: 2.3651e-06
Epoch 5/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.4349e-05 - val_loss: 7.5544e-06
Epoch 6/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 7.9676e-06 - val_loss: 1.6053e-05
Epoch 7/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 9.1905e-06 - val_loss: 9.2094e-06
Epoch 8/50
[1m2101/2101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss