In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix, classification_report, log_loss
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from scipy.stats import pearsonr
from sklearn.preprocessing import LabelEncoder
import time

In [2]:
def esa_score(phi, alpha):
    return np.dot(alpha, phi)

def threshold_crossing_rate(esa_baseline, esa_moral, tau):
    crossed = (esa_baseline < tau) & (esa_moral >= tau)
    return np.mean(crossed)

def moral_win_rate(esa_baseline, esa_moral):
    return np.mean(esa_moral > esa_baseline)

def esa_difference(esa_baseline, esa_moral):
    return np.mean(esa_moral - esa_baseline)

In [3]:
def evaluate_classification(y_true, y_pred):
    metrics = {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'f1_score': f1_score(y_true, y_pred),
        'roc_auc': roc_auc_score(y_true, y_pred)
    }
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    return metrics

In [4]:
if __name__ == '__main__':
    df = pd.read_csv('grad admission.csv')
    
    y = df['accept_status']
    X = df.drop(columns=['accept_status'])

    n_splits = 5
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    rf_preds = np.zeros(len(X))

    
    #start timer
    start_time = time.perf_counter()
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_model.fit(X_train, y_train)

        y_val_pred = rf_model.predict(X_val)
        rf_preds[val_idx] = y_val_pred

        print(f"Fold {fold+1} - Random Forest Baseline:")
        evaluate_classification(y_val, y_val_pred)
        
    
    #end time
    end_time = time.perf_counter()
    
    elapsed_time = end_time - start_time

    print("\n--- Final Evaluation (Random Forest Baseline) ---")
    evaluate_classification(y, rf_preds)
    
    metrics = evaluate_classification(y, rf_preds)
    print(metrics)


Fold 1 - Random Forest Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        55
           1       0.98      0.98      0.98        99

    accuracy                           0.97       154
   macro avg       0.97      0.97      0.97       154
weighted avg       0.97      0.97      0.97       154


Confusion Matrix:
[[53  2]
 [ 2 97]]
Fold 2 - Random Forest Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.85      0.91        55
           1       0.92      0.99      0.96        99

    accuracy                           0.94       154
   macro avg       0.95      0.92      0.93       154
weighted avg       0.94      0.94      0.94       154


Confusion Matrix:
[[47  8]
 [ 1 98]]
Fold 3 - Random Forest Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96 

In [5]:
elapsed_time


0.5694664996117353

# ESA-augment

In [8]:
if __name__ == '__main__':
    df3 = pd.read_csv('moral_loan_status.csv')
    
    # Apply label encoding
    df3['person_home_ownership'] = label_encoder.fit_transform(df3['person_home_ownership'])
    df3['loan_intent'] = label_encoder.fit_transform(df3['loan_intent'])
    df3['loan_grade'] = label_encoder.fit_transform(df3['loan_grade'])
    df3['cb_person_default_on_file'] = label_encoder.fit_transform(df3['cb_person_default_on_file'])
    
    y = df3['loan_status']
    X = df3.drop(columns=['loan_status'])

    n_splits = 5
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    rf_preds = np.zeros(len(X))

    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_model.fit(X_train, y_train)

        y_val_pred = rf_model.predict(X_val)
        rf_preds[val_idx] = y_val_pred

        print(f"Fold {fold+1} - Random Forest Baseline:")
        evaluate_classification(y_val, y_val_pred)

    print("\n--- Final Evaluation (Random Forest Baseline) ---")
    evaluate_classification(y, rf_preds)
    
    metrics = evaluate_classification(y, rf_preds)
    print(metrics)

Fold 1 - Random Forest Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6246
           1       0.98      0.93      0.96       271

    accuracy                           1.00      6517
   macro avg       0.99      0.97      0.98      6517
weighted avg       1.00      1.00      1.00      6517


Confusion Matrix:
[[6241    5]
 [  18  253]]
Fold 2 - Random Forest Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6245
           1       0.97      0.94      0.95       271

    accuracy                           1.00      6516
   macro avg       0.98      0.97      0.97      6516
weighted avg       1.00      1.00      1.00      6516


Confusion Matrix:
[[6236    9]
 [  17  254]]
Fold 3 - Random Forest Baseline:

Classification Report:
              precision    recall  f1-score   support

           0       1.00     

# Override

In [6]:
alpha = np.array([0.4, 0.2, 0.3, 0.0, 0.0, 0.1])
    
df5 = pd.read_csv('grad admission - ethics.csv')
            
esa_features = df5[['severity_cons','dur_cons','util_cons','prin_up','prin_vi','moral_int']].values
tau_values = df5['CST'].values
y_override = df5['accept_status_moral']
X_override = df5.drop(columns=['accept_status','accept_status_moral', 'ESA', 'CST', 'severity_cons','dur_cons','util_cons','prin_up','prin_vi','moral_int'])    

rf_override_preds = np.zeros(len(X_override))

start_time = time.perf_counter()
    
for fold, (train_idx, val_idx) in enumerate(skf.split(X_override, y_override)):
    X_train, X_val = X_override.iloc[train_idx], X_override.iloc[val_idx]
    y_train, y_val = y_override.iloc[train_idx], y_override.iloc[val_idx]
    phi_val = esa_features[val_idx]
    tau_val = tau_values[val_idx]

    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)

    y_pred_val = rf_model.predict(X_val)
    esa_vals = np.array([esa_score(phi, alpha) for phi in phi_val])
    moral_preds = (esa_vals >= tau_val).astype(int)
    rf_override_preds[val_idx] = moral_preds

    print(f"Fold {fold+1} - Random Forest ESA Override:")
    evaluate_classification(y_val, moral_preds)


#end time
end_time = time.perf_counter()
    
elapsed_time = end_time - start_time
    
print("\n--- Final Evaluation (Random Forest ESA Override) ---")
evaluate_classification(y_override, rf_override_preds)
print("\n--- Final Evaluation (Random Forests - ESA Override) ---")
print("Accuracy:", accuracy_score(y, rf_override_preds))
elapsed_time

Fold 1 - Random Forest ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        65
           1       0.58      1.00      0.73        89

    accuracy                           0.58       154
   macro avg       0.29      0.50      0.37       154
weighted avg       0.33      0.58      0.42       154


Confusion Matrix:
[[ 0 65]
 [ 0 89]]
Fold 2 - Random Forest ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        65
           1       0.58      1.00      0.73        89

    accuracy                           0.58       154
   macro avg       0.29      0.50      0.37       154
weighted avg       0.33      0.58      0.42       154


Confusion Matrix:
[[ 0 65]
 [ 0 89]]
Fold 3 - Random Forest ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 4 - Random Forest ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        65
           1       0.58      1.00      0.73        89

    accuracy                           0.58       154
   macro avg       0.29      0.50      0.37       154
weighted avg       0.33      0.58      0.42       154


Confusion Matrix:
[[ 0 65]
 [ 0 89]]
Fold 5 - Random Forest ESA Override:

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        64
           1       0.58      1.00      0.74        89

    accuracy                           0.58       153
   macro avg       0.29      0.50      0.37       153
weighted avg       0.34      0.58      0.43       153


Confusion Matrix:
[[ 0 64]
 [ 0 89]]

--- Final Evaluation (Random Forest ESA Override) ---

Classification Report:
              precision    recall  f1-score   support

           0  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0.5007782997563481

In [10]:
metrics = evaluate_classification(y, rf_override_preds)
print(metrics)


Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.81      0.89     31229
           1       0.18      1.00      0.31      1352

    accuracy                           0.82     32581
   macro avg       0.59      0.90      0.60     32581
weighted avg       0.97      0.82      0.87     32581


Confusion Matrix:
[[25229  6000]
 [    0  1352]]
{'accuracy': 0.8158435898222891, 'precision': 0.1838955386289445, 'recall': 1.0, 'f1_score': 0.31066176470588236, 'roc_auc': 0.9039354446187838}


In [7]:
rf_penalized_preds = np.zeros(len(X))

start_time = time.perf_counter()
    
for fold, (train_idx, val_idx) in enumerate(skf.split(X_override, y_override)):
    X_train, X_val = X_override.iloc[train_idx], X_override.iloc[val_idx]
    y_train, y_val = y_override.iloc[train_idx], y_override.iloc[val_idx]
    phi_train = esa_features[train_idx]
    tau_train = tau_values[train_idx]

    moral_penalty = np.array([(tau - esa_score(phi, alpha))**2 for phi, tau in zip(phi_train, tau_train)])
    sample_weights = np.clip(1 + 5 * moral_penalty, 1, 10)

    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train, sample_weight=sample_weights)

    y_val_pred = rf_model.predict(X_val)
    rf_penalized_preds[val_idx] = y_val_pred

    print(f"Fold {fold+1} - Random Forest ESA Penalized:")
    evaluate_classification(y_val, y_val_pred)

#end time
end_time = time.perf_counter()
    
elapsed_time = end_time - start_time

print("\n--- Final Evaluation (Random Forest ESA Penalized) ---")
evaluate_classification(y, rf_penalized_preds)

metrics = evaluate_classification(y, rf_penalized_preds)
print(metrics)
elapsed_time

Fold 1 - Random Forest ESA Penalized:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        65
           1       1.00      1.00      1.00        89

    accuracy                           1.00       154
   macro avg       1.00      1.00      1.00       154
weighted avg       1.00      1.00      1.00       154


Confusion Matrix:
[[65  0]
 [ 0 89]]
Fold 2 - Random Forest ESA Penalized:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        65
           1       1.00      1.00      1.00        89

    accuracy                           1.00       154
   macro avg       1.00      1.00      1.00       154
weighted avg       1.00      1.00      1.00       154


Confusion Matrix:
[[65  0]
 [ 0 89]]
Fold 3 - Random Forest ESA Penalized:

Classification Report:
              precision    recall  f1-score   support

           0       1.00      

0.5047963997349143