In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load data from output.csv
data = pd.read_csv('../output/output.csv')

# Split features and labels

feature_df= data.drop(data.columns[:2], axis=1)
X = np.asarray(feature_df)
y=np.asarray(data['pii_exist'])


# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding
num_classes = len(np.unique(y))
y_train_one_hot = tf.one_hot(y_train, depth=num_classes).numpy()
y_test_one_hot = tf.one_hot(y_test, depth=num_classes).numpy()

# Define SVM model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_hinge',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train_one_hot, epochs=10, batch_size=32, validation_data=(X_test, y_test_one_hot))

# Predict classes
y_pred = np.argmax(model.predict(X_test), axis=1)

# Calculate confusion matrix and F1 score
conf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Confusion Matrix:")
print(conf_matrix)
print("F1 Score:", f1)


Epoch 1/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.6289 - loss: 0.7001 - val_accuracy: 0.9254 - val_loss: 0.1671
Epoch 2/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9370 - loss: 0.1270 - val_accuracy: 0.9337 - val_loss: 0.1447
Epoch 3/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9463 - loss: 0.1045 - val_accuracy: 0.9420 - val_loss: 0.1284
Epoch 4/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9439 - loss: 0.1097 - val_accuracy: 0.9475 - val_loss: 0.1218
Epoch 5/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9527 - loss: 0.0946 - val_accuracy: 0.9448 - val_loss: 0.1206
Epoch 6/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9484 - loss: 0.1026 - val_accuracy: 0.9448 - val_loss: 0.1199
Epoch 7/10
[1m46/46[0m [32m━━━━━━━━━