In [12]:
from sklearn.svm import SVC
import os 
import pandas as pd
import pickle
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

In [13]:
# Load data
data_path = "../../data"
X_train = pd.read_csv(os.path.join(data_path, "X_train_pca.csv"))
y_train_org = pd.read_csv(os.path.join(data_path, "y_train_pca.csv"))
X_test = pd.read_csv(os.path.join(data_path, "X_test_pca.csv"))
y_test_org = pd.read_csv(os.path.join(data_path, "y_test_pca.csv"))

In [14]:
traits = ['Extraversion', 'Agreeableness', 'Conscientiousness', 'Emotional Stability', 'Openness']

In [15]:
# Create results directory
results_path = "../../results"
specific_results_path = os.path.join("../../results", "svm_classification")
os.makedirs(results_path, exist_ok=True)
os.makedirs(specific_results_path, exist_ok=True)

In [16]:
def calc_roc_auc(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    n_classes = np.unique(y_true)
    roc_auc_scores = []
    for label in n_classes:
        # Create binary labels for the current class vs. all other classes
        y_true_class = (y_true == label).astype(int)
        y_pred_class = (y_pred == label).astype(int)
        
        # Calculate ROC AUC for the current class
        roc_auc = roc_auc_score(y_true_class, y_pred_class)
        roc_auc_scores.append(roc_auc)
    return roc_auc_scores

In [17]:
# Cycle through traits

for trait in traits:
    print(f"Processing {trait}")
    trait_bin = trait + "_bin"
    label_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}
    y_train = [label_mapping[label] for label in y_train_org[trait_bin]]
    y_test = [label_mapping[label] for label in y_test_org[trait_bin]]

    svm_classifier = SVC(kernel='sigmoid', C=1.0, random_state=27)
    svm_classifier.fit(X_train, y_train)

    y_pred = svm_classifier.predict(X_test)
    y_true = y_test

    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average="weighted")
    recall = recall_score(y_true, y_pred, average="weighted")
    f1 = f1_score(y_true, y_pred, average="weighted")
    roc_auc = calc_roc_auc(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred)

    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1-Score: {f1}')
    print(f'ROC AUC: {roc_auc}')
    print(f'Confusion Matrix:\n{conf_matrix}')
    print("\n\n")
    metrics = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1, "roc_auc": roc_auc, "conf_matrix": conf_matrix}

    # Save model and metrics 
    curr_result_path = os.path.join(specific_results_path, trait)
    os.makedirs(curr_result_path, exist_ok=True)
    with open(os.path.join(curr_result_path, f'svm_model.pkl'), 'wb') as file:
        pickle.dump(svm_classifier, file)
    with open(os.path.join(curr_result_path, f'perf_metrics.pkl'), 'wb') as file:
        pickle.dump(metrics, file)

Processing Extraversion
Accuracy: 0.3605769230769231
Precision: 0.3933491627163862
Recall: 0.3605769230769231
F1-Score: 0.37298548823787847
ROC AUC: [0.4896383186705767, 0.544868238557559, 0.46948728087518116]
Confusion Matrix:
[[20 23 32]
 [53 95 58]
 [45 55 35]]



Processing Agreeableness
Accuracy: 0.5024038461538461
Precision: 0.5003152509452913
Recall: 0.5024038461538461
F1-Score: 0.5012893658145285
ROC AUC: [0.48582268845595755, 0.48582268845595755]
Confusion Matrix:
[[ 67 106]
 [101 142]]



Processing Conscientiousness
Accuracy: 0.53125
Precision: 0.5274894578313253
Recall: 0.53125
F1-Score: 0.529005839846894
ROC AUC: [0.516570928775737, 0.516570928775737]
Confusion Matrix:
[[ 74 103]
 [ 92 147]]



Processing Emotional Stability
Accuracy: 0.5697115384615384
Precision: 0.506632423751989
Recall: 0.5697115384615384
F1-Score: 0.528687769632098
ROC AUC: [0.4770897972031116, 0.4981327358087922, 0.4972016873407676]
Confusion Matrix:
[[  8  51   4]
 [ 49 227   8]
 [ 12  55   2]]



Pr

In [18]:
# Test load
with open(os.path.join(curr_result_path, f'perf_metrics.pkl'), 'rb') as file:
    metrics = pickle.load(file)
