In [1]:
from transformers import ViTImageProcessor, ViTForImageClassification, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from PIL import Image
import torch
import pandas as pd
import os
from tqdm import tqdm
import numpy as np 
from libauc.losses import pAUC_DRO_Loss
from libauc.optimizers import SOPAs
import random 
from libauc.sampler import DualSampler
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Call this function at the start of your main function
set_seed()


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

class ImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, feature_extractor, transform=None):
        # If the input is a DataFrame, use it directly; otherwise, load the CSV file
        if isinstance(csv_file, pd.DataFrame):
            self.data = csv_file
        else:
            self.data = pd.read_csv(csv_file)
        
        self.img_dir = img_dir
        self.image_paths = self.data['image'].values
        self.targets = self.data['target'].values
        self.feature_extractor = feature_extractor
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load the image
        img_path = os.path.join(self.img_dir, self.image_paths[idx] + '.jpg')
        image = Image.open(img_path).convert('RGB')
        
        # Apply custom transforms (if any)
        if self.transform:
            image = self.transform(image)

        # Preprocess the image using the feature extractor
        image = self.feature_extractor(images=image, return_tensors="pt")['pixel_values'].squeeze(0)
        
        # Get the label
        target = torch.tensor(self.targets[idx], dtype=torch.long)
        
        # Return the image tensor and label
        return image, target, idx
        


def train_model(model, model_name, train_dataloader, val_dataloader, optimizer, loss_fn, device, num_epochs=3):
    model.train()
    best_roc_auc = 0
    for epoch in range(num_epochs):
        train_loss = []
        optimizer.update_lr(decay_factor=10)

        for image, targets, index in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch"):
            optimizer.zero_grad()
            pixel_values = image
            labels = targets
            idx = index
            
            outputs = model(pixel_values=pixel_values, labels=labels)
            logits = outputs.logits
            probabilities = torch.softmax(logits, dim=-1)
            loss = loss_fn(probabilities, labels, idx)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())

        avg_loss = np.mean(train_loss)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")
        
        # Validation
        roc_auc = evaluate_model(model, val_dataloader, device)
        if roc_auc > best_roc_auc:
            best_roc_auc = roc_auc
            torch.save(model.state_dict(), f"best_model_{model_name.replace('/', '_')}.pth")
        
    print(f"Best pAUC Score: {best_roc_auc:.4f}")
    return best_roc_auc


def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, min_tpr: float=0.80):
    v_gt = abs(np.asarray(solution.values)-1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc


def collate_fn(batch):
    images, labels, indices = zip(*batch)  # Unpack the batch into images, labels, and indices
    return {
        'pixel_values': torch.stack(images),  # Stack the image tensors into a batch
        'labels': torch.tensor(labels),       # Convert the labels into a tensor
        'idx': torch.tensor(indices)          # Convert the indices into a tensor
    }

def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    # Wrap the dataloader with tqdm to show a progress bar
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Fine Tuning", unit="batch"):
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            
            # Forward pass
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    roc_auc = comp_score(pd.Series(all_labels), pd.Series(all_preds))
    print(f"pAUC Score: {roc_auc:.4f}")
    return roc_auc

In [3]:
df = pd.read_csv('/Users/jimmyhe/Desktop/KaggleCompetitions/ISISCANCER/MetaDataPlusProprocessed/ISIC_2019_Training_GroundTruth.csv')
df['target'] = df[['BCC', 'SCC', 'MEL']].eq(1.0).any(axis=1).astype(int)
models = [
    ('google/vit-base-patch16-224'),    # ViT-Base with 224x224 input size
    #('google/vit-large-patch16-224', 224, 2048),    # ViT-Large with 224x224 input size
    #('facebook/deit-base-patch16-224', 224, 1024),  # DeiT-Base with 224x224 input size (Data-efficient Image Transformer)
    ('facebook/deit-small-patch16-224'), # DeiT-Small with 224x224 input size (Smaller variant of DeiT)
    #('microsoft/swin-base-patch4-window7-224', 224, 1024), # Swin Transformer Base model with 224x224 input size
    #('microsoft/swin-large-patch4-window7-224', 224, 2048), # Swin Transformer Large model with 224x224 input size
]

df = df.head(300)
lr = 1e-3
margin = 0.6
gamma = 0.1
Lambda = 1.0
weight_decay = 1e-5
total_epoch = 60
decay_epoch = [20,40]
batch_size = 32
sampling_rate = 0.5
num_epochs = 3 

FileNotFoundError: [Errno 2] No such file or directory: '/Users/jimmyhe/Desktop/KaggleCompetitions/ISISCANCER/MetaDataPlusProprocessed/ISIC_2019_Training_GroundTruth.csv'

In [4]:
def main():
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    

    for model_name in models: 
        feature_extractor = ViTImageProcessor.from_pretrained(model_name)
        
        # Load the model with ignore_mismatched_sizes=True
        model = ViTForImageClassification.from_pretrained(
            model_name,
            num_labels=2,  
            ignore_mismatched_sizes=True
        ).to(DEVICE)
        model.config
        # Load and split the dataset
        train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

        # Create datasets and dataloaders
        train_dataset = ImageDataset(csv_file=train_df, img_dir='./train-image/image/', feature_extractor=feature_extractor)
        val_dataset = ImageDataset(csv_file=val_df, img_dir='./train-image/image/', feature_extractor=feature_extractor)

    # Create samplers and dataloaders
        train_sampler = DualSampler(train_dataset, batch_size, sampling_rate=sampling_rate)
        train_dataloader = DataLoader(train_dataset, sampler=train_sampler, collate_fn=collate_fn)
        val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)


        loss_fn = pAUC_DRO_Loss(data_len=len(train_dataset), margin=margin, gamma=gamma)
        optimizer = SOPAs(model.parameters(), lr=lr, mode='adam', weight_decay=weight_decay)
        train_model(model, model_name, train_dataloader,val_dataloader, optimizer,loss_fn,DEVICE, num_epochs=3)

        # Evaluate the model
        evaluate_model(model, val_dataloader, DEVICE)

        # Save the finetuned model
        save_dir = f"finetuned_model_{model_name.replace('/', '_')}"
        os.makedirs(save_dir, exist_ok=True)
        model.save_pretrained(save_dir)
        feature_extractor.save_pretrained(save_dir)

In [5]:


### REMEMBER TO SET SEEDS 
if __name__ == "__main__":
    main()


NameError: name 'models' is not defined

In [117]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from PIL import Image
import pandas as pd
import os
from tqdm import tqdm
import numpy as np
from libauc.losses import pAUC_DRO_Loss
from libauc.optimizers import SOPAs
import random
from transformers import ViTImageProcessor, ViTForImageClassification

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class ImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, feature_extractor, transform=None):
        self.data = pd.read_csv(csv_file) if isinstance(csv_file, str) else csv_file
        self.img_dir = img_dir
        self.image_paths = self.data['image'].values
        self.labels = self.data['target'].values
        self.feature_extractor = feature_extractor
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.image_paths[idx] + '.jpg')
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)

        inputs = self.feature_extractor(images=image, return_tensors="pt")['pixel_values'].squeeze(0)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        
        return inputs, label, idx

def train_model(model, model_name, train_dataloader, val_dataloader, optimizer, loss_fn, device, num_epochs=3):
    model.train()
    best_roc_auc = 0
    for epoch in range(num_epochs):
        train_loss = []
        optimizer.update_lr(decay_factor=10)

        for pixel_values, labels, indices in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch"):
            optimizer.zero_grad()
            pixel_values = pixel_values.to(device)
            labels = labels.to(device)
            indices = indices.to(device)
            
            outputs = model(pixel_values=pixel_values, labels=labels)
            logits = outputs.logits
            loss = loss_fn(logits, labels, indices)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())

        avg_loss = np.mean(train_loss)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")
        
        roc_auc = evaluate_model(model, val_dataloader, device)
        if roc_auc > best_roc_auc:
            best_roc_auc = roc_auc
            torch.save(model.state_dict(), f"best_model_{model_name.replace('/', '_')}.pth")
        
    print(f"Best pAUC Score: {best_roc_auc:.4f}")
    return best_roc_auc

def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, min_tpr: float=0.80):
    v_gt = abs(np.asarray(solution.values)-1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for pixel_values, labels, _ in tqdm(dataloader, desc="Evaluating", unit="batch"):
            pixel_values = pixel_values.to(device)
            labels = labels.to(device)
            
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    roc_auc = comp_score(pd.Series(all_labels), pd.Series(all_preds))
    print(f"pAUC Score: {roc_auc:.4f}")
    return roc_auc

def main():
    set_seed()

    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    df = pd.read_csv('/Users/jimmyhe/Desktop/KaggleCompetitions/ISISCANCER/MetaDataPlusProprocessed/ISIC_2019_Training_GroundTruth.csv')
    df['target'] = df[['BCC', 'SCC', 'MEL']].eq(1.0).any(axis=1).astype(int)
    
    models = [
        ('google/vit-base-patch16-224'),
        ('facebook/deit-small-patch16-224'),
    ]

    df = df.head(300)
    lr = 1e-3
    margin = 0.6
    gamma = 0.1
    Lambda = 1.0
    weight_decay = 1e-5
    batch_size = 32
    num_epochs = 3 

    for model_name in models: 
        feature_extractor = ViTImageProcessor.from_pretrained(model_name)
        
        model = ViTForImageClassification.from_pretrained(
            model_name,
            num_labels=2,  
            ignore_mismatched_sizes=True
        ).to(DEVICE)
        
        train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

        train_dataset = ImageDataset(csv_file=train_df, img_dir='./train-image/image/', feature_extractor=feature_extractor)
        val_dataset = ImageDataset(csv_file=val_df, img_dir='./train-image/image/', feature_extractor=feature_extractor)

        # Calculate weights for the sampler
        class_sample_count = np.array([len(np.where(train_dataset.labels == t)[0]) for t in np.unique(train_dataset.labels)])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in train_dataset.labels])
        samples_weight = torch.from_numpy(samples_weight)
        sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight))

        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
        val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        loss_fn = pAUC_DRO_Loss(data_len=len(train_dataset), margin=margin, gamma=gamma)
        optimizer = SOPAs(model.parameters(), lr=lr, mode='adam', weight_decay=weight_decay)
        train_model(model, model_name, train_dataloader, val_dataloader, optimizer, loss_fn, DEVICE, num_epochs=num_epochs)

        evaluate_model(model, val_dataloader, DEVICE)

        save_dir = f"finetuned_model_{model_name.replace('/', '_')}"
        os.makedirs(save_dir, exist_ok=True)
        model.save_pretrained(save_dir)
        feature_extractor.save_pretrained(save_dir)

if __name__ == "__main__":
    main()

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Reducing learning rate to 0.00010 @ T=0!


Epoch 1/3:   0%|          | 0/8 [00:01<?, ?batch/s]


RuntimeError: The size of tensor a (11) must match the size of tensor b (21) at non-singleton dimension 0