In [None]:
import os
print(os.getcwd())
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import make_scorer
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
import pandas as pd
from sklearn.linear_model import LogisticRegression
from FeatBoost.feat_selector import FeatureSelector
import json
import gc
from sklearn.model_selection import StratifiedKFold, ParameterGrid
import json
import csv
import uncertainty
from scipy.stats import beta


In [None]:
import os
print(os.getcwd())


In [None]:
gim_cohort = pd.read_parquet("./DataProcessing/Sep_24_gim_icd10.parquet")
sbk_gim = pd.read_parquet("./DataProcessing/Sep_24_sbk_gim_icd10.parquet")

non_gim_cohort = pd.read_parquet("./DataProcessing/Sep_24_non_gim_icd10.parquet")
locality = pd.read_csv("./fair_interpretable/fair_inter_locality_v2_update.csv")
statcan = pd.read_csv('./fair_interpretable/statcan_table.csv')
zero_sum_columns = [col for col in gim_cohort.columns if gim_cohort[col].sum() == 0]
gim_cohort = gim_cohort.drop(columns=zero_sum_columns)
sbk_gim = sbk_gim.drop(columns=zero_sum_columns)
non_gim_cohort = non_gim_cohort.drop(columns=zero_sum_columns)


In [None]:
fairness = locality.merge(statcan, how = 'left', on = 'da21uid')
print(fairness.shape)
fairness = fairness[['genc_id', 'households_dwellings_q_DA21','material_resources_q_DA21','age_labourforce_q_DA21','racialized_NC_pop_q_DA21']]
fairness.columns = ['genc_id', 'households_dwellings', 'material_resources', 'age_labourforce', 'racialized']
del locality
del statcan
gc.collect()
fairness_columns = list(fairness.columns)[1:]
print(fairness_columns)

In [None]:
gim_cohort = gim_cohort.drop_duplicates()
gim_cohort = gim_cohort.reset_index(drop=True)
gim_cohort = pd.concat([gim_cohort, sbk_gim], ignore_index=True)

In [None]:
gim_cohort.shape

In [None]:
def prevalence_rate(y, fairness_features_df, culmulative=False):
    """
    Calculate prevalence rate of delirium across fairness feature groups
    """
    prevalence_rates = []
    for fairness_feature in fairness_features_df.columns:
        if culmulative:
            if fairness_feature == "gender_F":
                splits = [0]
            else:
                splits = [1,2,3,4]
            for split in splits:
                fairness_binary = (fairness_features_df[fairness_feature]>split).astype(int)
                group0_mask = fairness_binary == 0
                group1_mask = fairness_binary == 1
               
    

In [1]:
def apk_binary(y_true, y_pred_probs, k):
    """
    Computes the Average Precision at K (AP@K) for binary predictions.
    :param y_true: List or array of ground truth binary labels (0 or 1).
    :param y_pred_probs: List or array of predicted probabilities.
    :param k: The number of top predictions to consider.
    :return: Average Precision at K (AP@K).
    """
    # Sort predictions by predicted probability in descending order
    sorted_indices = np.argsort(y_pred_probs)[::-1]
    y_true_sorted = np.array(y_true)[sorted_indices]

    # Compute precision at each relevant position
    num_hits = 0.0
    score = 0.0

    for i in range(min(k, len(y_true_sorted))):
        if y_true_sorted[i] == 1:  # Relevant item
            num_hits += 1.0
            score += num_hits / (i + 1.0)  # Precision at position i+1

    # Normalize by the number of relevant items or k
    return score / min(sum(y_true), k) if sum(y_true) > 0 else 0.0



def calc_metrics(prediction, prediction_prob, labels, k=10):
    acc = accuracy_score(labels, prediction)
    f1 = f1_score(labels, prediction, average='binary')  # Use 'micro', 'macro', 'weighted' for multi-class
    precision = precision_score(labels, prediction, average='binary')
    recall = recall_score(labels, prediction, average='binary')
    roc_auc = roc_auc_score(labels, prediction_prob)
    
    # Precision@k calculation
    # Sort by prediction probabilities in descending order
    sorted_indices = np.argsort(prediction_prob)[::-1]
    top_k_indices = sorted_indices[:k]
    
    # Count true positives in the top k predictions
    top_k_labels = np.array(labels)[top_k_indices]
    precision_at_k = np.sum(top_k_labels) / k
    
    return acc, f1, precision, recall, roc_auc, precision_at_k
    # return acc, f1, precision, recall, roc_auc




def compute_group_metrics(y_true, y_pred):
    """Compute TPR, FPR, Precision, and Recall for a given group."""
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    TP = np.sum((y_true == 1) & (y_pred == 1))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    
    TPR = TP / (TP + FN) if (TP + FN) > 0 else np.nan
    FPR = FP / (FP + TN) if (FP + TN) > 0 else np.nan
    precision = TP / (TP + FP) if (TP + FP) > 0 else np.nan
    recall = TPR  # Recall is the same as TPR
    
    return TPR, FPR, precision, recall


def calc_metrics_at_thresholds(num_features, prediction_prob, labels, fairness_features_df, thresholds=[0.5], k=10, culmulative=False):
    """ 
    Compute the fairness score at different thresholds at different split, and different fairness features
    """
    
    results = []
    
    # Calculate the ROC AUC once, as it does not depend on the threshold
    roc_auc = roc_auc_score(labels, prediction_prob)
    
    # Calculate Precision@k
    # Sort by prediction probabilities in descending order and take the top k predictions
    sorted_indices = np.argsort(prediction_prob)[::-1]
    top_k_indices = sorted_indices[:k]
    # Create a mask for the top k predictions
    top_k_mask = np.zeros(len(labels), dtype=bool)
    top_k_mask[top_k_indices] = True

    top_k_labels = np.array(labels)[top_k_indices]
    precision_at_k = np.sum(top_k_labels) / k

    for threshold in thresholds:
        # Apply the threshold to generate predictions
        prediction = (prediction_prob >= threshold).astype(int)
        
        # Calculate metrics
        correct_prediction = labels == prediction
        acc = np.mean(correct_prediction)
        f1 = f1_score(labels, prediction, average='binary')
        precision = precision_score(labels, prediction, average='binary')
        recall = recall_score(labels, prediction, average='binary')
        map_k = apk_binary(labels, prediction_prob, k=labels.shape[0])
        
        # Calculate the percentage of positive predictions
        pred_positive_percentage = np.mean(prediction) 
        true_positive_percentage = np.mean(labels)
        
        for fairness_feature in list(fairness_features_df.columns):
            if culmulative:
                ## Culmulative split points for fairness features
                if fairness_feature == "gender_F":
                    splits = [0]
                else:
                    splits = [1,2,3,4]
                col_name_split = "culmulative_split_point(<= split)"
                
            else:
                # exact split points for fairness features
                splits = fairness_features_df[fairness_feature].unique()
                col_name_split = "exact_split_point(== split)"
            for split in splits:

                if culmulative:
                    # Convert the fairness feature to binary based culmulative split points ex. (<= split) -> 1 , (> split) -> 0
                    fairness_binary = (fairness_features_df[fairness_feature]<=split).astype(int)
                else:
                    # Convert the fairness feature to binary based exat split points ex.(== split) -> 1, (!= split) -> 0
                    fairness_binary = (fairness_features_df[fairness_feature] == split).astype(int)
                group0_mask = fairness_binary == 0
                group1_mask = fairness_binary == 1

                ### prevalence rate
                ##“How common is label = 1 in this group in the real world?”	Large natural imbalance → model must handle different base-rates.
                prevalence_rate1 = (labels[group1_mask].sum() / len(labels[group1_mask])) if len(labels[group1_mask]) > 0 else 0
                prevalence_rate0 = (labels[group0_mask].sum() / len(labels[group0_mask])) if len(labels[group0_mask]) > 0 else 0


                # calculate prevalence rate of delirium across fairness feature groups @ the top k predictions
                #“Among the group’s members who made the shortlist (top-k), what fraction truly need/deserve the action?” (precision of ranking within the group)	
                # One group’s Prev@k ≫ another’s → scarce slots for the second group are ‘wasted’ on false positives or its true positives are being outranked.
                prevalence_rate1_k = (labels[group1_mask&top_k_mask].sum() / len(labels[group1_mask&top_k_mask])) if len(labels[group1_mask&top_k_mask]) > 0 else 0
                prevalence_rate0_k = (labels[group0_mask&top_k_mask].sum() / len(labels[group0_mask&top_k_mask])) if len(labels[group0_mask&top_k_mask]) > 0 else 0

                ### treatment rate
                # “With my chosen threshold, how often do I give the positive decision to this group?”	
                # #Gap ≫ prevalence gap → model amplifies imbalance; gap ≪ prevalence gap → model may under-serve high-need group.
                treatment_rate1 = (prediction[group1_mask].sum() / len(prediction[group1_mask])) if len(prediction[group1_mask]) > 0 else 0
                treatment_rate0 = (prediction[group0_mask].sum() / len(prediction[group0_mask])) if len(prediction[group0_mask]) > 0 else 0

                # calculate treatment rate of delirium across fairness feature groups @ the top k predictions
                # “What share of the entire group lands in the topk?” (allocation of a limited resource)	
                # TR@k gap shows direct disparate opportunity or burden when capacity is capped 
                treatment_rate1_k = (group1_mask & top_k_mask).sum() / group1_mask.sum() if group1_mask.sum() > 0 else 0
                treatment_rate0_k = (group0_mask & top_k_mask).sum() / group0_mask.sum() if group0_mask.sum() > 0 else 0



            
                if sum(group0_mask) == 0 or sum(group1_mask) == 0:
                    continue
                    
                group0_labels = np.array(labels)[group0_mask]
                group0_preds = np.array(prediction)[group0_mask]
    
                group1_labels = np.array(labels)[group1_mask]
                group1_preds = np.array(prediction)[group1_mask]

                TPR_0, FPR_0, precision_0, recall_0 = compute_group_metrics(group0_labels, group0_preds)
                TPR_1, FPR_1, precision_1, recall_1 = compute_group_metrics(group1_labels, group1_preds)

                tpr_diff = TPR_1 - TPR_0
                fpr_diff = FPR_1 - FPR_0
                prec_diff = precision_1 - precision_0
                rec_diff = recall_1 - recall_0

                # Calculate the tpr, fpr, precision, recall  @k
                TPR_0_k, FPR_0_k, precision_0_k, recall_0_k = compute_group_metrics(labels[group0_mask], # y_true  (only group-0 rows)
                                                                                    top_k_mask[group0_mask]) # y_pred  (1 if that row is in the top-k)
                TPR_1_k, FPR_1_k, precision_1_k, recall_1_k = compute_group_metrics(labels[group1_mask], 
                                                                                    top_k_mask[group1_mask])
                tpr_diff_k = TPR_1_k - TPR_0_k
                fpr_diff_k = FPR_1_k - FPR_0_k
                prec_diff_k = precision_1_k - precision_0_k
                rec_diff_k = recall_1_k - recall_0_k



                ##### Bayesian Unfairness and Uncertainty
                # Setting correct prediction as the favorable outcome, Bayesian disparity assumes both groups have a 50% chance of receiving the favorable outcome.
                E1 = correct_prediction 
                E2 = True
                bayesian_disparity = uncertainty.bayesian_disparity(group0_mask, group1_mask, E1, E2)
                bayesian_disparity_abs = np.abs(bayesian_disparity)
                uncertainty_value = uncertainty.uncertainty(group0_mask, group1_mask, E1, E2)


                # Calculate the uncertainty value@k
                E1_k = np.array(labels)[top_k_mask] == prediction[top_k_mask]
                E2_k = True
                bayesian_disparity_k = uncertainty.bayesian_disparity(group1_mask[top_k_mask], group0_mask[top_k_mask], E1_k, E2_k)
                bayesian_disparity_abs_k = np.abs(bayesian_disparity_k)
                uncertainty_value_k = uncertainty.uncertainty(group0_mask[top_k_mask], group1_mask[top_k_mask], E1_k, E2_k)

                
    
                results.append({
                        'num_features': num_features,
                        'threshold': threshold,
                        'fairness_feature': fairness_feature,
                        col_name_split: split,
                        'group_0_size': int((fairness_binary == 0).sum()),
                        'group_1_size': int((fairness_binary == 1).sum()),
                        'accuracy': acc,
                        'f1_score': f1,
                        'precision': precision,                        
                        'recall': recall,
                        'roc_auc': roc_auc,
                        'precision_at_k': precision_at_k,
                        'map@k': map_k,
                        'pred_positive_percentage': pred_positive_percentage,
                        'true_positive_percentage': true_positive_percentage,
                        'tpr_diff_abs': abs(tpr_diff),
                        'fpr_diff_abs': abs(fpr_diff),
                        'tpr_diff_raw': tpr_diff,
                        'fpr_diff_raw': fpr_diff,
                        'precision_diff_abs': abs(prec_diff),
                        'recall_diff_abs': abs(rec_diff),
                        'equalized_odds_max': max(abs(tpr_diff), abs(fpr_diff)),
                        'equalized_odds': 0.5*(abs(tpr_diff) + abs(fpr_diff)),
                        'bayesian_disparity': bayesian_disparity,
                        'bayesian_disparity_abs':bayesian_disparity_abs,
                        'bayesian_uncertainty': uncertainty_value,
                        'prevalence_rate1': prevalence_rate1,
                        'prevalence_rate0': prevalence_rate0,
                        'treatment_rate1': treatment_rate1,
                        'treatment_rate0': treatment_rate0,
                        'prevalence_rate1@k': prevalence_rate1_k,
                        'prevalence_rate0@k': prevalence_rate0_k,
                        'treatment_rate1@k': treatment_rate1_k,
                        'treatment_rate0@k': treatment_rate0_k,
                        'tpr_0@k': TPR_0_k,
                        'fpr_0@k': FPR_0_k,
                        'precision_0@k': precision_0_k,
                        'recall_0@k': recall_0_k,
                        'tpr_1@k': TPR_1_k,
                        'fpr_1@k': FPR_1_k,
                        'precision_1@k': precision_1_k,
                        'recall_1@k': recall_1_k,
                        'tpr_diff@k': tpr_diff_k,
                        'fpr_diff@k': fpr_diff_k,
                        'precision_diff@k': prec_diff_k,
                        'recall_diff@k': rec_diff_k,
                        'bayesian_disparity@k': bayesian_disparity_k,
                        'bayesian_disparity_abs@k': bayesian_disparity_abs_k,
                        'bayesian_uncertainty@k': uncertainty_value_k,
                        'equalized_odds_max@k': max(abs(tpr_diff_k), abs(fpr_diff_k)),
                        'equalized_odds@k': 0.5*(abs(tpr_diff_k) + abs(fpr_diff_k))
                        })
    
    df = pd.DataFrame(results)
    # max_eod_df = df.loc[df.groupby('fairness_feature')['equalized_odds'].idxmax()]
            
            
                    

 
            
    return df #max_eod_df


def bootstrap_sample(data, proportion=0.8):
    """
    Bootstrap sampling function.
    :param data: DataFrame to sample from.
    :param proportion: Proportion of the data to sample.
    :return: Sampled DataFrame.
    """
    n_samples = int(len(data) * proportion)
    return data.sample(n_samples, replace=True, random_state=42)




 




def dist_plot_top_k(prediction_prob, fairness_feature, k=10):
    # Sort by prediction probabilities in descending order and take the top k predictions
    sorted_indices = np.argsort(prediction_prob)[::-1]
    top_k_indices = sorted_indices[:k]
    top_k_fairness_feature = np.array(fairness_feature)[top_k_indices]
    
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    
    # Plot the distribution of the top k predictions
    ax[0].hist(prediction_prob[top_k_indices], bins=20, color='skyblue', edgecolor='black')
    ax[0].set_title(f'Distribution of Top {k} Predictions')
    ax[0].set_xlabel('Prediction Probability')
    ax[0].set_ylabel('Count')
    
    # Plot the distribution of the fairness feature in the top k predictions
    ax[1].hist(top_k_fairness_feature, bins=20, color='lightcoral', edgecolor='black')
    ax[1].set_title(f'Distribution of Fairness Feature in Top {k} Predictions')
    ax[1].set_xlabel('Fairness Feature Value')
    ax[1].set_ylabel('Count')
    
    plt.tight_layout()
    plt.show()




In [None]:

def bootstrap_sample(data, proportion=0.8):
    """
    Bootstrap sampling function.
    :param data: DataFrame to sample from.
    :param proportion: Proportion of the data to sample.
    :return: Sampled DataFrame.
    """
    n_samples = int(len(data) * proportion)
    return data.sample(n_samples, replace=True, random_state=42)

def bootstrap_confidence_interval(data, metric_func, n_iterations=1000, alpha=0.05):
    """
    Calculate the confidence interval for a metric using bootstrap sampling.
    :param data: DataFrame containing the data.
    :param metric_func: Function to calculate the metric.
    :param n_iterations: Number of bootstrap iterations.
    :param alpha: Significance level for the confidence interval.
    :return: Tuple containing the lower and upper bounds of the confidence interval.
    """
    for i in range(n_iterations):
        sample = bootstrap_sample(data)
        metric_df  = metric_func(sample)
        
        


In [None]:
def plot_bootstrap_distribution()

In [3]:
import torch

In [4]:
a_tensor = torch.tensor([[1.0, 2.0, 3.0],[4.0, 5.0, 6.0]])
torch.ones(3,4)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [5]:
torch.randn(3,4)

tensor([[-0.1090, -0.7893,  1.3153,  0.9808],
        [-0.2056,  0.2684, -0.0027,  1.8391],
        [ 1.4302,  0.5655,  0.7846,  0.1274]])

In [6]:
torch.zeros_like(a_tensor)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [12]:
x = torch.arange(24).reshape(2,3,4)
z = x.reshape(6,4)                       # merges first two dims
z
x

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [11]:
# (N, C, H, W)  →  (N, H*W, C)
out = x.permute(0,2,1)
out

tensor([[[ 0,  4,  8],
         [ 1,  5,  9],
         [ 2,  6, 10],
         [ 3,  7, 11]],

        [[12, 16, 20],
         [13, 17, 21],
         [14, 18, 22],
         [15, 19, 23]]])

In [None]:
def __init__(self, num_features):
    self.num_features = num_features
    self.model = LogisticRegression(max_iter=1000)
    self.feature_selector = FeatureSelector()
    self.feature_selector.set_params(num_features=num_features)
    self.feature_selector.set_params(model=self.model)

def __iter__(self):
    return self
def __next__(self):
    if self.current_index >= len(self.data):
        raise StopIteration
    batch = self.data[self.current_index:self.current_index + self.batch_size]
    self.current_index += self.batch_size
    return batch
