## Libraries

In [1]:
import pandas as pd
import numpy as np
from crepes import ConformalClassifier, ConformalPredictiveSystem
from crepes.extras import hinge, margin, binning, DifficultyEstimator

## Data

### Calibration 

In [6]:
data_dir = "calibration_and_test.xlsx" #directory
df_cal=pd.read_excel(data_dir,sheet_name='conf_score_25') #cal dataset with conf=0.25
df_cal_tp = df_cal[df_cal['TP']==1] # True Positive Predictions Only

# 43 samples from each class
sample_size = 43
df_sampled = df_cal_tp.groupby('pred_obj_id').apply(lambda x: x.sample(sample_size))
df_sampled = df_sampled.reset_index(drop=True)

df_test=pd.read_excel(data_dir,sheet_name='test_conf_score_25') #test dataset with conf=0.25
df_test_filtered = df_test[df_test['pred_obj_id'] != '-']
df_calibration=df_sampled
df_calibration.drop(['ascaris_sig'] ,axis=1,inplace=True)
df_testing=df_test_filtered

In [21]:
df_calibration['background_conf_score'] = 1 -df_calibration['objectness_score']
df_testing['background_conf_score'] = 1 -df_testing['objectness_score']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_testing['background_conf_score'] = 1 -df_testing['objectness_score']


In [10]:
from my_functions import (softmax, 
                          softmax_df, 
                          calculate_logit, 
                          calculate_sigmoid, 
                          calculate_softmax,
                          calculate_hinge_scores,
                          calculate_p_values,
                          calculate_prediction_sets)

## Calibration stage

In [17]:
alphas_cal = hinge(
    df_calibration[[
        'ascaris_conf_score',
        'trichuris_conf_score',
        'hookworm_conf_score',
        'schistosoma_conf_score',
        'background_conf_score']].values,
    [0,1,2,3],df_calibration[['pred_obj_id']].values
)
cc_std = ConformalClassifier()
cc_std.fit(alphas_cal)
display(cc_std)

ConformalClassifier(fitted=True, mondrian=False)

## Testing stage

### Applying the hinge on the testing dataset

In [22]:
conf_score_columns = [
    'ascaris_conf_score',
    'trichuris_conf_score',
    'hookworm_conf_score',
    'schistosoma_conf_score',
    'background_conf_score']
hinge_score_cols = [
    'ascaris_hinge_score', 
    'trichuris_hinge_score', 
    'hookworm_hinge_score', 
    'schistosoma_hinge_score',
    'background_hinge_score']
df_testing = calculate_hinge_scores(
    df_testing, 
    conf_score_columns, 
    hinge_score_cols)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [23]:
p_value_cols = [
    'ascaris_p_value', 
    'trichuris_p_value', 
    'hookworm_p_value', 
    'schistosoma_p_value',
    'background_p_value']
df_testing = calculate_p_values(
    df_testing, 
    hinge_score_cols, 
    p_value_cols, 
    cc_std)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


In [24]:
p_set_cols = [
    'ascaris_p_set', 
    'trichuris_p_set', 
    'hookworm_p_set', 
    'schistosoma_p_set',
    'background_p_set']
df_testing = calculate_prediction_sets(
    df_testing, 
    hinge_score_cols, 
    p_set_cols, 
    cc_std,
    confidence=0.99)

In [25]:
df_testing

Unnamed: 0,filename,image_width,image_height,gt_obj_id,gt_x_center,gt_y_center,gt_obj_width,gt_obj_height,pred_obj_id,pred_x_center,...,ascaris_p_value,trichuris_p_value,hookworm_p_value,schistosoma_p_value,background_p_value,ascaris_p_set,trichuris_p_set,hookworm_p_set,schistosoma_p_set,background_p_set
0,11184288_1608_36_af7b24d7-fd97-42a0-acc9-1b7a9...,1280,720,3,0.9605,0.6972,0.0773,0.1583,3,0.961328,...,0.002729,0.000820,0.002253,0.202315,0.060503,0,0,0,1,1
1,11184288_1608_36_af7b24d7-fd97-42a0-acc9-1b7a9...,1280,720,0,0.4059,0.4347,0.0586,0.1278,2,0.406641,...,0.001614,0.004178,0.058210,0.001404,0.048762,0,0,1,0,1
2,11184288_1608_36_af7b24d7-fd97-42a0-acc9-1b7a9...,1280,720,-,-,-,-,-,1,0.405078,...,0.002574,0.166411,0.002001,0.002868,0.002908,0,1,0,0,0
3,11184394_1608_36_af7b24d7-fd97-42a0-acc9-1b7a9...,1280,720,3,0.1703,0.7111,0.0984,0.125,3,0.16875,...,0.000350,0.004011,0.000485,0.396645,0.017952,0,0,0,1,1
4,11184394_1608_36_af7b24d7-fd97-42a0-acc9-1b7a9...,1280,720,-,-,-,-,-,0,0.692187,...,0.433057,0.001937,0.004992,0.001965,0.006169,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11869,9354474_1379_31_75602595-167c-4758-8f96-e8ee5e...,1280,720,0,0.6988,0.5833,0.0602,0.0833,0,0.698828,...,0.863311,0.004978,0.000099,0.000475,0.005004,1,0,0,0,0
11870,9354474_1379_31_75602595-167c-4758-8f96-e8ee5e...,1280,720,0,0.4684,0.2347,0.0586,0.1028,0,0.46875,...,0.954962,0.000772,0.002853,0.000987,0.000262,1,0,0,0,0
11871,9357486_1384_31_941af06b-54a3-4552-b75f-4e6e72...,1280,720,0,0.0187,0.7007,0.0375,0.0958,0,0.019141,...,0.710480,0.003843,0.001716,0.003081,0.005080,1,0,0,0,0
11873,9419118_1393_31_55d950b3-e8d5-48b7-b7a7-30014f...,1280,720,-,-,-,-,-,3,0.970312,...,0.001782,0.005379,0.000774,0.072923,0.179330,0,0,0,1,1


In [29]:
# Create a boolean mask for each condition
mask1 = (df_testing['gt_obj_id'] == any) & (df_testing['pred_obj_id'] == any)
mask2 = df_testing['ascaris_p_set'] == 0
mask3 = (df_testing['trichuris_p_set'] == 0) & (df_testing['hookworm_p_set'] == 0) & (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

# Create a boolean mask for each condition
mask1 = (df_testing['gt_obj_id'] == 1) & (df_testing['pred_obj_id'] != 1)
mask2 = df_testing['trichuris_p_set'] == 0
mask3 = (df_testing['ascaris_p_set'] == 0) & (df_testing['hookworm_p_set'] == 0) & (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

# Create a boolean mask for each condition
mask1 = (df_testing['gt_obj_id'] == 2) & (df_testing['pred_obj_id'] != 2)
mask2 = df_testing['hookworm_p_set'] == 0
mask3 = (df_testing['ascaris_p_set'] == 0) & (df_testing['trichuris_p_set'] == 0) & (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

# Create a boolean mask for each condition
mask1 = (df_testing['gt_obj_id'] == 3) & (df_testing['pred_obj_id'] != 3)
mask2 = df_testing['schistosoma_p_set'] == 0
mask3 = (df_testing['ascaris_p_set'] == 0) & (df_testing['trichuris_p_set'] == 0) & (df_testing['hookworm_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

0
19
7
2


### One Class Per Set with TP

In [73]:
def count_rows(df, obj_id, p_set_one, p_set_zero):
    mask1 = (df['gt_obj_id'] == obj_id) & (df['pred_obj_id'] == obj_id)
    mask2 = df[p_set_one] == 1
    mask3 = df[p_set_zero].apply(lambda x: all(val == 0 for val in x), axis=1)
    mask = mask1 & mask2 & mask3
    df_filtered = df[mask]
    count = len(df_filtered)
    return count

p_set_zero = ['ascaris_p_set', 'trichuris_p_set', 'hookworm_p_set', 'schistosoma_p_set']

CPTP_1 = []
CPTP_1.append(count_rows(df_testing, 0, 'ascaris_p_set', [col for col in p_set_zero if col != 'ascaris_p_set']))
CPTP_1.append(count_rows(df_testing, 1, 'trichuris_p_set', [col for col in p_set_zero if col != 'trichuris_p_set']))
CPTP_1.append(count_rows(df_testing, 2, 'hookworm_p_set', [col for col in p_set_zero if col != 'hookworm_p_set']))
CPTP_1.append(count_rows(df_testing, 3, 'schistosoma_p_set', [col for col in p_set_zero if col != 'schistosoma_p_set']))

print(CPTP_1)

[8846, 454, 16, 129]


### One Class Per Set with FP

In [74]:
def count_rows(df, obj_id, p_set_one, p_set_zero):
    mask1 = (df['gt_obj_id'] != obj_id) & (df['pred_obj_id'] == obj_id)
    mask2 = df[p_set_one] == 1
    mask3 = df[p_set_zero].apply(lambda x: all(val == 0 for val in x), axis=1)
    mask = mask1 & mask2 & mask3
    df_filtered = df[mask]
    count = len(df_filtered)
    return count

p_set_zero = ['ascaris_p_set', 'trichuris_p_set', 'hookworm_p_set', 'schistosoma_p_set']

CPFP = []
CPFP.append(count_rows(df_testing, 0, 'ascaris_p_set', [col for col in p_set_zero if col != 'ascaris_p_set']))
CPFP.append(count_rows(df_testing, 1, 'trichuris_p_set', [col for col in p_set_zero if col != 'trichuris_p_set']))
CPFP.append(count_rows(df_testing, 2, 'hookworm_p_set', [col for col in p_set_zero if col != 'hookworm_p_set']))
CPFP.append(count_rows(df_testing, 3, 'schistosoma_p_set', [col for col in p_set_zero if col != 'schistosoma_p_set']))

print(CPFP)

[1002, 83, 80, 246]


In [60]:
# Create a boolean mask for each condition
mask1 = (df_testing['ascaris_p_set'] == 0)
mask2 = (df_testing['trichuris_p_set'] == 0)
mask3 = (df_testing['hookworm_p_set'] == 0)
mask4 = (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3 & mask4

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

205


In [37]:
# Create a boolean mask for each condition
mask1 = df_testing['gt_obj_id'].isin(['-', '0', '1', '2', '3'])
mask2 = (df_testing['pred_obj_id'] == 0)
mask3 = (df_testing['ascaris_p_set'] == 0)
mask4 = (df_testing['trichuris_p_set'] == 0)
mask5 = (df_testing['hookworm_p_set'] == 0)
mask6 = (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3 & mask4 & mask5 & mask6

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

# Create a boolean mask for each condition
mask1 = df_testing['gt_obj_id'].isin(['-', '0', '1', '2', '3'])
mask2 = (df_testing['pred_obj_id'] == 1)
mask3 = (df_testing['ascaris_p_set'] == 0)
mask4 = (df_testing['trichuris_p_set'] == 0)
mask5 = (df_testing['hookworm_p_set'] == 0)
mask6 = (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3 & mask4 & mask5 & mask6

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

# Create a boolean mask for each condition
mask1 = df_testing['gt_obj_id'].isin(['-', '0', '1', '2', '3'])
mask2 = (df_testing['pred_obj_id'] == 2)
mask3 = (df_testing['ascaris_p_set'] == 0)
mask4 = (df_testing['trichuris_p_set'] == 0)
mask5 = (df_testing['hookworm_p_set'] == 0)
mask6 = (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3 & mask4 & mask5 & mask6

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

# Create a boolean mask for each condition
mask1 = df_testing['gt_obj_id'].isin(['-', '0', '1', '2', '3'])
mask2 = (df_testing['pred_obj_id'] == 3)
mask3 = (df_testing['ascaris_p_set'] == 0)
mask4 = (df_testing['trichuris_p_set'] == 0)
mask5 = (df_testing['hookworm_p_set'] == 0)
mask6 = (df_testing['schistosoma_p_set'] == 0)

# Combine the masks
mask = mask1 & mask2 & mask3 & mask4 & mask5 & mask6

# Filter the DataFrame
df_filtered = df_testing[mask]

# Count the number of rows
count = len(df_filtered)

print(count)

492
57
40
181


In [26]:
def count_rows(df, pred_obj_id):
    mask1 = df['gt_obj_id'].isin(['-', '0', '1', '2', '3'])
    mask2 = (df['pred_obj_id'] == pred_obj_id)
    mask3 = (df['ascaris_p_set'] == 0)
    mask4 = (df['trichuris_p_set'] == 0)
    mask5 = (df['hookworm_p_set'] == 0)
    mask6 = (df['schistosoma_p_set'] == 0)
    mask7 = (df['background_p_set'] == 0)
    mask = mask1 & mask2 & mask3 & mask4 & mask5 & mask6 & mask7
    df_filtered = df[mask]
    count = len(df_filtered)
    return count

print(count_rows(df_testing, 0))
print(count_rows(df_testing, 1))
print(count_rows(df_testing, 2))
print(count_rows(df_testing, 3))

0
0
0
0


In [70]:
p_set_cols = ['ascaris_p_set', 'trichuris_p_set', 'hookworm_p_set', 'schistosoma_p_set']

# Create a mask where each value is True if the corresponding value in p_set_cols is not 0
mask = (df_testing[p_set_cols] != 0)

# Sum the True values along the rows. This gives the number of non-zero values in each row.
non_zero_counts = mask.sum(axis=1)

# Create a mask where each value is True if the corresponding value in non_zero_counts is more than 1
mask = (non_zero_counts > 1)

# Use the mask to filter the DataFrame
df_filtered = df_testing[mask]
df_filtered[['gt_obj_id', 'pred_obj_id', 
             'ascaris_conf_score',
             'trichuris_conf_score',
             'hookworm_conf_score',
             'schistosoma_conf_score', 
             'ascaris_p_set', 
             'trichuris_p_set', 
             'hookworm_p_set', 
             'schistosoma_p_set']]

Unnamed: 0,gt_obj_id,pred_obj_id,ascaris_conf_score,trichuris_conf_score,hookworm_conf_score,schistosoma_conf_score,ascaris_p_set,trichuris_p_set,hookworm_p_set,schistosoma_p_set
902,1,1,0.301193,0.351152,0.005601,0.001631,1,1,0,0
2092,-,1,0.355089,0.458751,0.005392,0.001316,1,1,0,0
2155,1,0,0.431732,0.378634,0.003548,0.001008,1,1,0,0
2477,-,1,0.298776,0.34148,0.004575,0.001411,1,1,0,0
2642,1,0,0.393372,0.279044,0.015524,0.001046,1,1,0,0
2894,0,2,0.341559,0.003329,0.424501,0.011766,1,0,1,0
3935,-,2,0.000512,0.371302,0.468278,0.008835,0,1,1,0
3953,-,2,0.321308,0.000991,0.339459,0.011739,1,0,1,0
4387,2,2,0.327979,0.007173,0.338701,0.006878,1,0,1,0
4401,-,2,0.283149,0.007673,0.546001,0.007348,1,0,1,0


In [30]:
p_set_cols = ['ascaris_p_set', 'trichuris_p_set', 'hookworm_p_set', 'schistosoma_p_set', 'background_p_set']

# Create a mask where each value is True if the corresponding value in p_set_cols is 0
mask = (df_testing[p_set_cols] == 0)

# Use the all function to check if all values in each row are True
mask_all_zero = mask.all(axis=1)

# Use the mask to filter the DataFrame
df_filtered_all_zero = df_testing[mask_all_zero]
df_filtered_all_zero[['gt_obj_id', 'pred_obj_id', 
                      'ascaris_conf_score',
                      'trichuris_conf_score',
                      'hookworm_conf_score',
                      'schistosoma_conf_score', 
                      'background_conf_score',
                      'ascaris_p_set', 
                      'trichuris_p_set', 
                      'hookworm_p_set', 
                      'schistosoma_p_set',
                      'background_p_set']]

Unnamed: 0,gt_obj_id,pred_obj_id,ascaris_conf_score,trichuris_conf_score,hookworm_conf_score,schistosoma_conf_score,background_conf_score,ascaris_p_set,trichuris_p_set,hookworm_p_set,schistosoma_p_set,background_p_set


In [62]:
df_testing.to_csv('hinge_conf_score.csv',index=False)