# Importing necessary libraries

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import numpy as np

# CP method metrices(specific to Breast Cancer Dataset):- 

In [56]:
def avg_set_size_metric(conformal_set):
    lengths = torch.sum(conformal_set, dim=1)
    avg_set_size_len = torch.sum(lengths)/conformal_set.shape[0]
    return avg_set_size_len




def breast_cancer_class_Overlap_metric(conformal_set, label):

    overlap_count = 0

    for i in range(conformal_set.shape[0]):
        current_set = conformal_set[i]

        if (label[i] == 0 or label[i] == 1 or label[i] == 2 or label[i] == 3 ) and (current_set[4] == 1 or current_set[5] == 1 or current_set[6] == 1 or current_set[7] == 1):
            overlap_count += 1
        
        elif (label[i] == 4 or label[i] == 5 or label[i] == 6 or label[i] == 7 ) and (current_set[0] == 1 or current_set[1] == 1 or current_set[2] == 1 or current_set[3] == 1):
            overlap_count += 1


    perecentage_of_overlap  = (overlap_count/conformal_set.shape[0])*100

    return perecentage_of_overlap




def breast_cancer_confusion_set_Overlap_metric(conformal_set, label):

    overlap_count = 0

    for i in range(conformal_set.shape[0]):
        current_set = conformal_set[i]

        if (label[i] == 1) and (current_set[2] == 1):
            overlap_count += 1

        elif (label[i] == 2) and (current_set[1] == 1):
            overlap_count += 1

        

        elif (label[i] == 4) and (current_set[5] == 1):
            overlap_count += 1

        elif (label[i] == 5) and (current_set[4] == 1):
            overlap_count += 1
            
            
    perecentage_of_overlap  = (overlap_count/conformal_set.shape[0])*100

    return perecentage_of_overlap

# RAPS class :- 

In [57]:
class RAPS():
    def __init__(self, softmax, true_class, alpha, k_reg, lambd, rand=True):
        self.prob_output = softmax
        self.true_class = true_class
        self.alpha = (1 - alpha) * (1 + (1 / softmax.shape[0]))
        self.k_reg = k_reg
        self.lambd = lambd
        self.rand = rand
        
        
        
    def conformal_score(self):
        conformal_score = []
        for i in range(self.prob_output.shape[0]):
            true_class_prob = self.prob_output[i][self.true_class[i]]
            current_class_prob = self.prob_output[i]
            sorted_class_prob, _ = torch.sort(current_class_prob, descending=True)
            index = torch.nonzero(sorted_class_prob == true_class_prob).item()
            cumulative_sum = torch.sum(sorted_class_prob[:index + 1])
            
            if index - self.k_reg > 0:
               cumulative_sum = cumulative_sum + self.lambd*(index - self.k_reg)
            
            if self.rand:
                U = torch.rand(1).item()
                cumulative_sum = cumulative_sum - U*sorted_class_prob[index]

            conformal_score.append(cumulative_sum)
        
        conformal_score = torch.tensor(conformal_score)
        
        return conformal_score
    
    
    
    def quantile(self):
        conformal_scores = self.conformal_score()
        quantile_value = torch.quantile(conformal_scores, self.alpha)
        return quantile_value
    
    
    
    def prediction(self, softmax, quantile_value):
        prob_output = softmax
        prediction = torch.zeros(prob_output.shape[0], prob_output.shape[1])
        for i in range(prob_output.shape[0]):
            current_class_prob = prob_output[i]
            sorted_class_prob, _ = torch.sort(current_class_prob, descending=True)
            sum = 0
            j = 0
            for idx in range(len(sorted_class_prob)):
                if sum <= quantile_value:
                    sum += sorted_class_prob[idx]
                    if idx - self.k_reg > 0:
                        sum = sum + self.lambd*(idx - self.k_reg)
                    j += 1
                else:
                    break
                    
            """
            
            if self.rand:
                U = torch.rand(1).item()
                if j != prob_output.shape[1]:
                    N = torch.sum(sorted_class_prob[:j + 1]) - quantile_value
                else:
                    N = torch.sum(sorted_class_prob[:j]) - quantile_value
                if idx - self.k_reg > 0:
                    N += self.lambd*(j - self.k_reg)

                if j != prob_output.shape[1]:
                    D = sorted_class_prob[j]
                else:
                    D = sorted_class_prob[j-1]
                if idx - self.k_reg > 0:
                    D += self.lambd

                if N/D <= U:
                    j = j -1
                    
                """ 
        
            
            
            
                    
            for idx in range(j):
                index = torch.nonzero(current_class_prob == sorted_class_prob[idx]).item()
                prediction[i][index] = 1.0
                
        return prediction

# Main function :- 

In [58]:
def main(expt_no, Trials, alpha,k_reg, lambd, rand):
    
    avg_set_size_len_for_T_trials = []
    avg_coverage_gap_for_T_trials = []
    avg_coverage_for_T_trials = []

    normal_avg_set_size_len_for_T_trials = []
    abnormal_avg_set_size_len_for_T_trials = []


    perecentage_of_overlap_for_T_trials = []

    confusion_set_Overlap_metric_for_T_trials = []
    
    
    
    for t in range(Trials):
        
        # loading the annotation file :-
        #Expt1:-
        #path = ''
        #df = pd.read_csv(path)
        
        #Expt2:-
        #path = ''
        #df = pd.read_csv(path)
        
        #Expt3:-
        #path = ''
        #df = pd.read_csv(path)
        
        #Expt4:-
        #path = ''
        #df = pd.read_csv(path)
        
        
        # calib-test split :- 
        feature_test, feature_calib = train_test_split(df, test_size = 0.3, stratify=df['Label'], random_state=42)

        feature_test = feature_test.reset_index(drop=True)
        feature_calib = feature_calib.reset_index(drop=True)

        prob_output = feature_calib.iloc[:,:-1]
        df_np = prob_output.values 
        df_prob_output_calib = torch.tensor(df_np, dtype=torch.float32)

        prob_output = feature_test.iloc[:,:-1]
        df_np = prob_output.values
        df_prob_output_test = torch.tensor(df_np, dtype=torch.float32)


        true_class = feature_calib.iloc[:,-1]
        df_np = true_class.values
        df_true_class_calib = torch.tensor(df_np, dtype=torch.int)


        true_class = feature_test.iloc[:,-1]
        df_np = true_class.values
        df_true_class_test = torch.tensor(df_np, dtype=torch.int)
        
        conformal_wrapper = RAPS(df_prob_output_calib, df_true_class_calib, alpha, k_reg, lambd, rand)
        quantile_value = conformal_wrapper.quantile()
        
        conformal_set = conformal_wrapper.prediction(df_prob_output_test, quantile_value)
        
        
        
        
        if expt_no == 1:
            avg_set_size = avg_set_size_metric(conformal_set)
            #print(f'avg_set_size:- {avg_set_size}')
            avg_set_size_len_for_T_trials.append(avg_set_size)
        
        
        elif expt_no == 2:
            label = df_true_class_test
            indices_0 = torch.nonzero(label == 0).squeeze()
            indices_1 = torch.nonzero(label == 1).squeeze()
            indices_2 = torch.nonzero(label == 2).squeeze()
            indices_3 = torch.nonzero(label == 3).squeeze()
            indices_4 = torch.nonzero(label == 4).squeeze()
            indices_5 = torch.nonzero(label == 5).squeeze()
            indices_6 = torch.nonzero(label == 6).squeeze()
            indices_7 = torch.nonzero(label == 7).squeeze()
            
            
            Normal_idx = torch.cat((indices_0, indices_1, indices_2, indices_3))
            Abnormal_idx = torch.cat((indices_4, indices_5, indices_6, indices_7))

            normal_conformal_prediction_set = conformal_set[Normal_idx, :]
            abnormal_conformal_prediction_set = conformal_set[Abnormal_idx, :]

            normal_avg_set_size_len = avg_set_size_metric(normal_conformal_prediction_set)
            abnormal_avg_set_size_len = avg_set_size_metric(abnormal_conformal_prediction_set)

            normal_avg_set_size_len_for_T_trials.append(normal_avg_set_size_len)
            abnormal_avg_set_size_len_for_T_trials.append(abnormal_avg_set_size_len)
            
            
        
        elif expt_no == 3:
            perecentage_of_overlap = breast_cancer_class_Overlap_metric(conformal_set, df_true_class_test)
            
            perecentage_of_overlap_for_T_trials.append(perecentage_of_overlap)


        elif expt_no == 4:
            perecentage_of_confusion = breast_cancer_confusion_set_Overlap_metric(conformal_set, df_true_class_test)
            #print(f'perecentage_of_confusion :- {perecentage_of_confusion}')

            confusion_set_Overlap_metric_for_T_trials.append(perecentage_of_confusion)
            
            
            
    
    
    
    
    if expt_no == 1:
        avg_set_size_len_for_T_trials = np.array(avg_set_size_len_for_T_trials)
        average = np.mean(avg_set_size_len_for_T_trials)
        std_dev = np.std(avg_set_size_len_for_T_trials, ddof=1)

        
        #print(f"Average set_size_len_for_T_trials: {average}")
        #print(f"Standard Deviation set_size_len_for_T_trials: {std_dev}")
        
        return average, std_dev
        
        
    
    
    elif expt_no == 2:
        

        normal_avg_set_size_len_for_T_trials = np.array(normal_avg_set_size_len_for_T_trials)
        normal_average_set_size_len = np.mean(normal_avg_set_size_len_for_T_trials)
        normal_std_dev_set_size_len = np.std(normal_avg_set_size_len_for_T_trials, ddof=1)

        



        abnormal_avg_set_size_len_for_T_trials = np.array(abnormal_avg_set_size_len_for_T_trials)
        abnormal_average_set_size_len = np.mean(abnormal_avg_set_size_len_for_T_trials)
        abnormal_std_dev_set_size_len = np.std(abnormal_avg_set_size_len_for_T_trials, ddof=1)

        
        
        return normal_average_set_size_len, normal_std_dev_set_size_len, abnormal_average_set_size_len, abnormal_std_dev_set_size_len

        
        
        

    elif expt_no == 3:
        perecentage_of_overlap_for_T_trials = np.array(perecentage_of_overlap_for_T_trials)
        average_perecentage_of_overlap_for_T_trials = np.mean(perecentage_of_overlap_for_T_trials)
        std_dev_perecentage_of_overlap_for_T_trials = np.std(perecentage_of_overlap_for_T_trials, ddof=1)

       
        
        return average_perecentage_of_overlap_for_T_trials, std_dev_perecentage_of_overlap_for_T_trials

        
        
        
        

    elif expt_no == 4:
        confusion_set_Overlap_metric_for_T_trials = np.array(confusion_set_Overlap_metric_for_T_trials)
        average_confusion_set_Overlap_metric_for_T_trials = np.mean(confusion_set_Overlap_metric_for_T_trials)
        std_dev_confusion_set_Overlap_metric_for_T_trials = np.std(confusion_set_Overlap_metric_for_T_trials, ddof=1)

     
        
        
        return average_confusion_set_Overlap_metric_for_T_trials, std_dev_confusion_set_Overlap_metric_for_T_trials
        

# Grid search for hyperparameter tuning(k_reg, lambd) for RAPS method

In [62]:
expt_no = 4
Trials = 10
alpha = 0.05
k_reg = [1, 2, 3, 4, 5, 6, 7]
lambd = [0, 0.0001, 0.001, 0.01, 0.02, 0.05, 0.2, 0.5, 0.7, 1.0]
rand = True



dic_expt_1 = {
    'average_len' : [],
    'std_dev_len' : [],
 }


dic_expt_3 = {
    'average_perecentage_of_overlap_for_T_trials' : [],
    'std_dev_perecentage_of_overlap_for_T_trials' : []
}


dic_expt_4 = {
    'average_confusion_set_Overlap_metric_for_T_trials' : [],
    'std_dev_confusion_set_Overlap_metric_for_T_trials' : []
   
}



dic_expt_2 = {
    'normal_average_set_size_len' : [],
    'normal_std_dev_set_size_len' : [],
    'abnormal_average_set_size_len' : [],
    'abnormal_std_dev_set_size_len' : []
}
    
    
    
    
for i in range(len(k_reg)):
    for j in range(len(lambd)):
        print(f'k_reg :- {k_reg[i]}, lambd :- {lambd[j]}')
        
        main(expt_no, Trials, alpha, k_reg[i], lambd[j], rand)
        
        if expt_no == 1:
            average_len, std_dev_len = main(expt_no, Trials, alpha, k_reg[i], lambd[j], rand)
            dic_expt_1['average_len'].append(average_len)
            dic_expt_1['std_dev_len'].append(std_dev_len)
            
        elif expt_no == 2:
            normal_average_set_size_len, normal_std_dev_set_size_len, abnormal_average_set_size_len, abnormal_std_dev_set_size_len = main(expt_no, Trials, alpha, k_reg[i], lambd[j], rand)
            dic_expt_2['normal_average_set_size_len'].append(normal_average_set_size_len)
            dic_expt_2['normal_std_dev_set_size_len'].append(normal_std_dev_set_size_len)
            dic_expt_2['abnormal_average_set_size_len'].append(abnormal_average_set_size_len)
            dic_expt_2['abnormal_std_dev_set_size_len'].append(abnormal_std_dev_set_size_len)
            
            
        elif expt_no == 3:
            average_perecentage_of_overlap_for_T_trials, std_dev_perecentage_of_overlap_for_T_trials = main(expt_no, Trials, alpha, k_reg[i], lambd[j], rand) 
            dic_expt_3['average_perecentage_of_overlap_for_T_trials'].append(average_perecentage_of_overlap_for_T_trials)
            dic_expt_3['std_dev_perecentage_of_overlap_for_T_trials'].append(std_dev_perecentage_of_overlap_for_T_trials)
            
            
        elif expt_no == 4:
            average_confusion_set_Overlap_metric_for_T_trials, std_dev_confusion_set_Overlap_metric_for_T_trials = main(expt_no, Trials, alpha, k_reg[i], lambd[j], rand)
            dic_expt_4['average_confusion_set_Overlap_metric_for_T_trials'].append(average_confusion_set_Overlap_metric_for_T_trials)
            dic_expt_4['std_dev_confusion_set_Overlap_metric_for_T_trials'].append(std_dev_confusion_set_Overlap_metric_for_T_trials)
            
        

k_reg :- 1, lambd :- 0
k_reg :- 1, lambd :- 0.0001
k_reg :- 1, lambd :- 0.001
k_reg :- 1, lambd :- 0.01
k_reg :- 1, lambd :- 0.02
k_reg :- 1, lambd :- 0.05
k_reg :- 1, lambd :- 0.2
k_reg :- 1, lambd :- 0.5
k_reg :- 1, lambd :- 0.7
k_reg :- 1, lambd :- 1.0
k_reg :- 2, lambd :- 0
k_reg :- 2, lambd :- 0.0001
k_reg :- 2, lambd :- 0.001
k_reg :- 2, lambd :- 0.01
k_reg :- 2, lambd :- 0.02
k_reg :- 2, lambd :- 0.05
k_reg :- 2, lambd :- 0.2
k_reg :- 2, lambd :- 0.5
k_reg :- 2, lambd :- 0.7
k_reg :- 2, lambd :- 1.0
k_reg :- 3, lambd :- 0
k_reg :- 3, lambd :- 0.0001
k_reg :- 3, lambd :- 0.001
k_reg :- 3, lambd :- 0.01
k_reg :- 3, lambd :- 0.02
k_reg :- 3, lambd :- 0.05
k_reg :- 3, lambd :- 0.2
k_reg :- 3, lambd :- 0.5
k_reg :- 3, lambd :- 0.7
k_reg :- 3, lambd :- 1.0
k_reg :- 4, lambd :- 0
k_reg :- 4, lambd :- 0.0001
k_reg :- 4, lambd :- 0.001
k_reg :- 4, lambd :- 0.01
k_reg :- 4, lambd :- 0.02
k_reg :- 4, lambd :- 0.05
k_reg :- 4, lambd :- 0.2
k_reg :- 4, lambd :- 0.5
k_reg :- 4, lambd :- 0.7
k

In [63]:
df = pd.DataFrame(dic_expt_4)
df

Unnamed: 0,average_confusion_set_Overlap_metric_for_T_trials,std_dev_confusion_set_Overlap_metric_for_T_trials
0,23.142685,1.592158
1,22.071042,1.795899
2,22.221553,1.725506
3,23.251054,1.821859
4,24.388922,1.749151
...,...,...
65,22.600843,1.829228
66,22.835641,1.729143
67,23.594220,2.247185
68,22.552679,1.502627


In [64]:
min_index = df['average_confusion_set_Overlap_metric_for_T_trials'].idxmin()
min_row = df.loc[min_index]
min_row

average_confusion_set_Overlap_metric_for_T_trials    21.366647
std_dev_confusion_set_Overlap_metric_for_T_trials     0.875421
Name: 39, dtype: float64