In [1]:
import pandas as pd
import numpy as np

# Load both CSVs
df1 = pd.read_csv('./LORA/scores_deit_ISIC_2024_Training_Input.csv')
df2 = pd.read_csv('./LORA/scores_resnet34_ISIC_2024_Training_Input.csv')

# Combine them
combined = pd.concat([df1, df2])

# Clip predictions to avoid log(0) or log(∞)
eps = 1e-7
combined['prediction_clipped'] = combined['prediction'].clip(eps, 1 - eps)

# Convert to logits
combined['logit'] = np.log(combined['prediction_clipped'] / (1 - combined['prediction_clipped']))

# Group by isic_id and average logits
logit_avg = combined.groupby('isic_id', as_index=False).agg({
    'logit': 'mean',
    'target': 'first'  # assuming target is consistent
})

# Convert averaged logits back to probabilities
logit_avg['prediction'] = 1 / (1 + np.exp(-logit_avg['logit']))

# Keep only required columns
result = logit_avg[['isic_id', 'prediction', 'target']]

# Save to the same file name
result.to_csv('combined_avg_predictions.csv', index=False)

In [11]:
from sklearn.metrics import confusion_matrix
threshold = 0.97
from sklearn.metrics import f1_score
result['predicted_label'] = (result['prediction'] >= threshold).astype(int)

# Step 2: Calculate F1 score
f1 = f1_score(result['target'], result['predicted_label'])
print(f"F1 Score: {f1:.4f}")

tn, fp, fn, tp = confusion_matrix(result['target'], result['predicted_label']).ravel()
print(tn,fn,tp,fp)


F1 Score: 0.0699
79356 48 31 777


In [None]:
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import time
from sklearn.metrics import roc_auc_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import numpy as np
import pandas as pd
import pandas.api.types
from sklearn.metrics import roc_curve, auc, roc_auc_score
import torchvision
import timm
def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, min_tpr: float=0.80) -> float:
    '''
    2024 ISIC Challenge metric: pAUC
    
    Given a solution file and submission file, this function returns the
    the partial area under the receiver operating characteristic (pAUC) 
    above a given true positive rate (TPR) = 0.80.
    https://en.wikipedia.org/wiki/Partial_Area_Under_the_ROC_Curve.
    
    (c) 2024 Nicholas R Kurtansky, MSKCC

    Args:
        solution: ground truth pd.DataFrame of 1s and 0s
        submission: solution dataframe of predictions of scores ranging [0, 1]

    Returns:
        Float value range [0, max_fpr]
    '''
    for col in solution.columns:
        if col != 'is_malignant':
            del solution[col]
    
    for col in submission.columns:
        if col != 'prediction':
            del submission[col]

    # check submission is numeric
    if not pandas.api.types.is_numeric_dtype(submission.values):
        raise ParticipantVisibleError('Submission target column must be numeric')

    # rescale the target. set 0s to 1s and 1s to 0s (since sklearn only has max_fpr)
    v_gt = abs(solution.values.ravel()-1)
    
    # flip the submissions to their compliments
    v_pred = -1.0*submission.values.ravel()

    max_fpr = abs(1-min_tpr)

    # using sklearn.metric functions: (1) roc_curve and (2) auc
    fpr, tpr, _ = roc_curve(v_gt, v_pred, sample_weight=None)
    if max_fpr is None or max_fpr == 1:
        return auc(fpr, tpr)
    if max_fpr <= 0 or max_fpr > 1:
        raise ValueError("Expected min_tpr in range [0, 1), got: %r" % min_tpr)
        
    # Add a single point at max_fpr by linear interpolation
    stop = np.searchsorted(fpr, max_fpr, "right")
    x_interp = [fpr[stop - 1], fpr[stop]]
    y_interp = [tpr[stop - 1], tpr[stop]]
    tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
    fpr = np.append(fpr[:stop], max_fpr)
    partial_auc = auc(fpr, tpr)

#     # Equivalent code that uses sklearn's roc_auc_score
#     v_gt = abs(np.asarray(solution.values)-1)
#     v_pred = np.array([1.0 - x for x in submission.values])
#     max_fpr = abs(1-min_tpr)
#     partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
#     # change scale from [0.5, 1.0] to [0.5 * max_fpr**2, max_fpr]
#     # https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range
#     partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    
    return(partial_auc)