In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix, classification_report, log_loss
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr
from sklearn.preprocessing import LabelEncoder
import time

In [2]:
def esa_score(phi, alpha):
    return np.dot(alpha, phi)

def threshold_crossing_rate(esa_baseline, esa_moral, tau):
    crossed = (esa_baseline < tau) & (esa_moral >= tau)
    return np.mean(crossed)

def moral_win_rate(esa_baseline, esa_moral):
    return np.mean(esa_moral > esa_baseline)

def esa_difference(esa_baseline, esa_moral):
    return np.mean(esa_moral - esa_baseline)

In [3]:
def evaluate_classification(y_true, y_pred):
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'f1_score': f1_score(y_true, y_pred),
        'roc_auc': roc_auc_score(y_true, y_pred)
    }
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    return metrics

In [4]:
df = pd.read_csv('grad admission - ethics.csv')
    
y = df['accept_status_moral']
X = df.drop(columns=['accept_status', 'ESA', 'CST', 'severity_cons','dur_cons','util_cons','prin_up','prin_vi','moral_int', 'accept_status_moral'])

esa_features = df[['severity_cons','dur_cons','util_cons','prin_up','prin_vi','moral_int']].values
tau_values = df['CST'].values
alpha = np.array([0.4, 0.2, 0.3, 0.0, 0.0, 0.1])

scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

svm_baseline_preds = np.zeros(len(X))
svm_override_preds = np.zeros(len(X))
svm_penalized_preds = np.zeros(len(X))

b_time = 0
o_time = 0
p_time = 0

for fold, (train_idx, val_idx) in enumerate(skf.split(X_scaled, y)):
    X_train, X_val = X_scaled.iloc[train_idx], X_scaled.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    b_time_s = time.perf_counter()
    # -------- Baseline --------
    model_baseline = SVC(probability=True)
    model_baseline.fit(X_train, y_train)
    y_val_pred_baseline = model_baseline.predict(X_val)
    svm_baseline_preds[val_idx] = y_val_pred_baseline
    print(f"Fold {fold+1} - SVM Baseline:")
    evaluate_classification(y_val, y_val_pred_baseline)
    b_time_e = time.perf_counter()
    b_time = b_time + (b_time_e - b_time_s)
    
    o_time_s = time.perf_counter()
    # -------- Override --------
    phi_val = esa_features[val_idx]
    tau_val = tau_values[val_idx]
    esa_vals = np.array([esa_score(phi, alpha) for phi in phi_val])
    moral_preds = (esa_vals >= tau_val).astype(int)
    svm_override_preds[val_idx] = moral_preds
    print(f"Fold {fold+1} - SVM ESA Override:")
    evaluate_classification(y_val, moral_preds)
    o_time_e = time.perf_counter()
    o_time = o_time + (o_time_e - o_time_s)
    
    
    p_time_s = time.perf_counter()
    # -------- Penalized --------
    phi_train = esa_features[train_idx]
    tau_train = tau_values[train_idx]
    moral_penalty = np.array([(tau - esa_score(phi, alpha))**2 for phi, tau in zip(phi_train, tau_train)])
    sample_weights = np.clip(1 + 5 * moral_penalty, 1, 10)

    model_penalized = SVC(probability=True)
    model_penalized.fit(X_train, y_train, sample_weight=sample_weights)
    y_val_pred_penalized = model_penalized.predict(X_val)
    svm_penalized_preds[val_idx] = y_val_pred_penalized
    print(f"Fold {fold+1} - SVM ESA Penalized:")
    evaluate_classification(y_val, y_val_pred_penalized)
    p_time_e = time.perf_counter()
    p_time = p_time + (p_time_e - p_time_s)

print("\n--- Final Evaluation (SVM Baseline) ---")
#evaluate_classification(y, svm_baseline_preds)
#print("Accuracy:", accuracy_score(y, svm_baseline_preds))
metrics = evaluate_classification(y, svm_baseline_preds)
print(metrics)

print("\n--- Final Evaluation (SVM ESA Override) ---")
#evaluate_classification(y, svm_override_preds)
#print("Accuracy:", accuracy_score(y, svm_override_preds))
metrics = evaluate_classification(y, svm_override_preds)
print(metrics)

print("\n--- Final Evaluation (SVM ESA Penalized) ---")
#evaluate_classification(y, svm_penalized_preds)
#print("Accuracy:", accuracy_score(y, svm_baseline_preds))
metrics = evaluate_classification(y, svm_penalized_preds)
print(metrics)

print("time baseline:", b_time)
print("time override:", o_time)
print("time penalized:", p_time)

Fold 1 - SVM Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        65
           1       0.98      0.98      0.98        89

    accuracy                           0.97       154
   macro avg       0.97      0.97      0.97       154
weighted avg       0.97      0.97      0.97       154


Confusion Matrix:
[[63  2]
 [ 2 87]]
Fold 1 - SVM ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        65
           1       0.58      1.00      0.73        89

    accuracy                           0.58       154
   macro avg       0.29      0.50      0.37       154
weighted avg       0.33      0.58      0.42       154


Confusion Matrix:
[[ 0 65]
 [ 0 89]]
Fold 1 - SVM ESA Penalized:

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.94      0.95        65
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 5 - SVM Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.97      0.94        64
           1       0.98      0.93      0.95        89

    accuracy                           0.95       153
   macro avg       0.94      0.95      0.95       153
weighted avg       0.95      0.95      0.95       153


Confusion Matrix:
[[62  2]
 [ 6 83]]
Fold 5 - SVM ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        64
           1       0.58      1.00      0.74        89

    accuracy                           0.58       153
   macro avg       0.29      0.50      0.37       153
weighted avg       0.34      0.58      0.43       153


Confusion Matrix:
[[ 0 64]
 [ 0 89]]
Fold 5 - SVM ESA Penalized:

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.97      0.96        64
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
