In [1]:
import numpy as np

mic_scores_path = "pretrained_mic_pairs_scores.txt"
accel_scores_path = "pretrained_accel_pairs_scores.txt"


def read_file(path):
    labels, scores = [], []
    with open(path, "r") as file:
        for line in file:
            line = line.strip().split()

            
            class_label = int(line[0])
            score = float(line[-1])

            labels.append(class_label)
            scores.append(score)

    return labels, scores

labels, mic_scores = read_file(mic_scores_path)
_, accel_scores = read_file(accel_scores_path)

print(len(labels), len(mic_scores), len(accel_scores))
mic_scores = np.array(mic_scores)
accel_scores = np.array(accel_scores)

print(mic_scores, accel_scores)

15170 15170 15170
[0.70339489 0.76720452 0.73567629 ... 0.33624983 0.30147082 0.13339698] [0.63835269 0.76861578 0.68000686 ... 0.40162069 0.36670646 0.40017611]


In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler(with_std=False)
mic_scores_normalized = scaler.fit_transform(mic_scores.reshape(-1, 1)).squeeze(1)
accel_scores_scores_normalized = scaler.fit_transform(accel_scores.reshape(-1, 1)).squeeze(1)


In [32]:
from sklearn.metrics import roc_curve
import numpy as np

import matplotlib.pyplot as plt
def plot_frr_far_vs_threshold(labels, scores, file_path='frr_far_plot.png'):
    fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
    frr = 1 - tpr  # False Rejection Rate is 1 - True Positive Rate

    plt.figure(figsize=(10, 6))
    plt.plot(thresholds, frr, label='FRR (1-TPR)', color='red')
    plt.plot(thresholds, fpr, label='FAR (FPR)', color='blue')
    plt.xlabel('Decision Threshold')
    plt.ylabel('Error Rate')
    plt.title('FRR and FAR vs. Threshold')
    plt.legend(loc="upper right")
    plt.grid(True)

    # Find the EER point
    eer_index = np.nanargmin(np.abs(frr - fpr))
    eer_threshold = thresholds[eer_index]
    eer_value = fpr[eer_index]
    plt.scatter(eer_threshold, eer_value, color='green')
    plt.annotate(f'EER = {eer_value:.2f}', (eer_threshold, eer_value), textcoords="offset points", xytext=(0,10), ha='center')

    plt.savefig(file_path)
    plt.close()

    return eer_value, eer_threshold


def plot_roc_curve(fpr, tpr, label=None, file_path='roc_curve.png'):
    plt.figure(figsize=(10, 6))
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--')  # Dashed diagonal

    # Calculate the EER and annotate it
    eer_index = np.nanargmin(np.abs(fpr - (1 - tpr)))
    eer = fpr[eer_index]
    plt.scatter(fpr[eer_index], tpr[eer_index], color='red')  # EER point
    plt.annotate(f'EER = {eer:.2f}', (fpr[eer_index], tpr[eer_index]), textcoords="offset points", xytext=(10,-10), ha='center')

    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve with EER Point')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.savefig(file_path)
    plt.close()

def plot_det_curve(fpr, fnr, label=None, file_path='det_curve.png'):
    plt.figure(figsize=(10, 6))
    plt.plot(fpr, fnr, linewidth=2, label=label)
    plt.xlabel('False Positive Rate')
    plt.ylabel('False Negative Rate')
    plt.title('DET Curve')
    # plt.xscale('log')
    # plt.yscale('log')
    plt.grid(True)
    plt.savefig(file_path)
    plt.close()


def calculate_eer_and_curves(labels, scores):
    fpr, tpr, threshold = roc_curve(labels, scores, pos_label=1)
    fnr = 1 - tpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    EER = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    
    plot_roc_curve(fpr, tpr, "ROC Curve", "roc_curve.png")
    plot_det_curve(fpr, fnr, "DET Curve", "det_curve.png")
    _, _ = plot_frr_far_vs_threshold(labels, scores)


    return EER, eer_threshold


def calculate_eer(labels, scores):
    fpr, tpr, threshold = roc_curve(labels, scores, pos_label=1)
    fnr = 1 - tpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    EER = fpr[np.nanargmin(np.absolute((fnr - fpr)))]

    return EER, eer_threshold

# Simple averaging


In [21]:

avg_scores = (0.95* mic_scores + 0.05 * accel_scores)


eer, thresh = calculate_eer(labels, avg_scores)
print(eer, thresh)


0.03611111111111111 0.47419234514236447


# Weighted Average

In [40]:
from sklearn.metrics import roc_curve
from scipy.optimize import brentq
from scipy.interpolate import interp1d

best_eer = 1.

steps = np.arange(0, 1, 0.01)

for w1 in steps:
    w2 = 1. - w1
    avg_scores = (w1* mic_scores + w2* accel_scores)
    eer, thresh = calculate_eer(labels, avg_scores)
    
    if eer < best_eer:
        best_eer = eer
        best_thresh = thresh
        w1_best = w1
        w2_best = w2


print(w1_best, w2_best, best_eer, best_thresh)




0.76 0.24 0.03340840840840841 0.49923916339874264


In [57]:
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


concat_scores = np.vstack((mic_scores, accel_scores)).T
scores_normalized = np.vstack((mic_scores_normalized, accel_scores_scores_normalized)).T

X_train, X_test, y_train, y_test = train_test_split(concat_scores, labels, test_size=0.2, random_state=2123)

# Create and train logistic regression model
model =  LogisticRegression(class_weight= {1:0.8, 0:0.2})
model.fit(X_train, y_train)


# Predict probabilities on the test set
y_prob = model.predict_proba(X_test)[:, 1]


print(y_prob)
print(y_test)
# Calculate ROC AUC
auc = roc_auc_score(y_test, y_prob)
print("ROC AUC Score:", auc)


eer, thresh = calculate_eer(y_test, y_prob)
print(eer, thresh)

[0.02604713 0.00258776 0.00107086 ... 0.96825653 0.05371785 0.00482883]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.preprocessing import StandardScaler
from scipy.optimize import brentq
from scipy.interpolate import interp1d



# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scores_normalized, labels, test_size=0.1, random_state=42)

# Create and train Random Forest model
rf_model = RandomForestClassifier(n_estimators=200)  # 100 trees in the forest
rf_model.fit(X_train, y_train)

# Predict probabilities on the test set
y_prob = rf_model.predict_proba(X_test)[:, 1]

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob)

# Calculate the EER
eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
threshold_at_eer = interp1d(fpr, thresholds)(eer)

print(f"EER: {eer:.2f} at threshold: {threshold_at_eer}")
