In [15]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

import pandas as pd

# SVM Implementation


In [None]:
"""
    Train an SVM model on a dataset and evaluate its performance.

    Parameters:
        X (pd.DataFrame or np.array): Features of the dataset
        y (pd.DataFrame or np.array): Target labels (hate speech vs no hate speech)
        test_size (float): Proportion of the dataset to be included in the test split
        kernel (str): Specifies the kernel type to be used in the SVM 
        C (float): Regularization parameter
        random_state (int, optional): Controls the shuffling applied to the data before applying the split

    Output:
        dict: A dictionary containing the trained model, accuracy score, and classification report
    """

def train_svm(X, y, test_size=0.2, kernel='linear', C=1.0, random_state=None):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    model = SVC(kernel=kernel, C=C, random_state=random_state)

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')  # 'weighted' -> handle class imbalance
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    report = classification_report(y_test, y_pred)

    return {
        'model': model,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'classification_report': report
    }


# SVM Usage

In [16]:
data = pd.read_csv("..\\..\\data\\twitter_hate-speech\\train_cleaned.csv")

X = data.drop(columns=['label'])
y = data['label']

result = train_svm(X, y, test_size=0.25, kernel='rbf', C=1.0, random_state=42)

print(f"Accuracy: {result['accuracy']}")
print(f"Precision: {result['precision']}")
print(f"Recall: {result['recall']}")
print(f"F1-score: {result['f1_score']}")
print("Classification Report:")
print(result['classification_report'])


ValueError: could not convert string to float: 'sometimes the simple things are the best! #tea #cake   #simplicity '