In [1]:
from google.colab import drive
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
file_path = '/content/drive/MyDrive/scom/admin.csv'
data = pd.read_csv(file_path)


In [4]:
texts = data['text'].tolist()
labels = data['label'].tolist()

In [5]:
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, random_state=42)


In [6]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_features = tfidf_vectorizer.fit_transform(train_texts)
val_features = tfidf_vectorizer.transform(val_texts)
test_features = tfidf_vectorizer.transform(test_texts)

In [7]:
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(train_features, train_labels)

In [8]:
train_predictions = svm_classifier.predict(train_features)
val_predictions = svm_classifier.predict(val_features)
test_predictions = svm_classifier.predict(test_features)

In [9]:
train_accuracy = accuracy_score(train_labels, train_predictions)
val_accuracy = accuracy_score(val_labels, val_predictions)
test_accuracy = accuracy_score(test_labels, test_predictions)

print(f"Train Accuracy: {train_accuracy:.2f}")
print(f"Validation Accuracy: {val_accuracy:.2f}")
print(f"Test Accuracy: {test_accuracy:.2f}")



Train Accuracy: 1.00
Validation Accuracy: 0.69
Test Accuracy: 0.70


In [10]:
print("Train Classification Report:")
print(classification_report(train_labels, train_predictions))

print("Validation Classification Report:")
print(classification_report(val_labels, val_predictions))

print("Test Classification Report:")
print(classification_report(test_labels, test_predictions))


Train Classification Report:
              precision    recall  f1-score   support

      normal       0.99      0.99      0.99       163
      racism       1.00      1.00      1.00       160
      sexism       1.00      0.99      1.00       157
       toxic       0.99      1.00      0.99       156

    accuracy                           1.00       636
   macro avg       1.00      1.00      1.00       636
weighted avg       1.00      1.00      1.00       636

Validation Classification Report:
              precision    recall  f1-score   support

      normal       0.49      0.46      0.47        39
      racism       0.71      0.79      0.75        38
      sexism       0.72      0.68      0.70        38
       toxic       0.80      0.80      0.80        45

    accuracy                           0.69       160
   macro avg       0.68      0.68      0.68       160
weighted avg       0.68      0.69      0.69       160

Test Classification Report:
              precision    recall  f1-s