In [1]:
from scipy.sparse import linalg
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [3]:
dark_patterns_df = pd.read_csv('data/darkpatterns.csv')

X_train, X_test, y_train, y_test = train_test_split(dark_patterns_df['text'], dark_patterns_df['label'], test_size=0.2, random_state=42)

In [4]:
vectorizer = TfidfVectorizer()

X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [5]:
model = SVC(kernel='linear', C=2)

model.fit(X_train_vectorized, y_train)

y_pred = model.predict(X_test_vectorized)

In [6]:
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)

joblib.dump(model, 'model_svm.pkl')

Accuracy: 0.9216101694915254
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.91      0.92       244
           1       0.90      0.94      0.92       228

    accuracy                           0.92       472
   macro avg       0.92      0.92      0.92       472
weighted avg       0.92      0.92      0.92       472



['model_svm.pkl']