In [1]:
from transformers import ViTImageProcessor, ViTForImageClassification, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from PIL import Image
import torch
import pandas as pd
import os
from tqdm import tqdm
import numpy as np 
from transformers import get_linear_schedule_with_warmup
import random
import numpy as np
import torch

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Call this function at the start of your main function
set_seed()


In [2]:
groun_truths = pd.read_csv('/Users/jimmyhe/Desktop/KaggleCompetitions/ISISCANCER/MetaDataPlusProprocessed/ISIC_2019_Training_GroundTruth.csv')
groun_truths.columns

Index(['image', 'MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK'], dtype='object')

In [3]:
groun_truths[groun_truths['BCC'] == 1.0].value_counts().sum(),groun_truths[groun_truths['SCC'] == 1.0].value_counts().sum(),groun_truths[groun_truths['MEL'] == 1.0].value_counts().sum()
train_metadata_corr = groun_truths.copy()

(3323, 628, 4522)

In [7]:
train_metadata_corr = train_metadata_corr.head(300)

In [8]:
models = [
    ('google/vit-base-patch16-224'),    # ViT-Base with 224x224 input size
    #('google/vit-large-patch16-224', 224, 2048),    # ViT-Large with 224x224 input size
    #('facebook/deit-base-patch16-224', 224, 1024),  # DeiT-Base with 224x224 input size (Data-efficient Image Transformer)
    ('facebook/deit-small-patch16-224'), # DeiT-Small with 224x224 input size (Smaller variant of DeiT)
    #('microsoft/swin-base-patch4-window7-224', 224, 1024), # Swin Transformer Base model with 224x224 input size
    #('microsoft/swin-large-patch4-window7-224', 224, 2048), # Swin Transformer Large model with 224x224 input size
]

def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, min_tpr: float=0.80):
    v_gt = abs(np.asarray(solution.values)-1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1-min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc

num_epochs = 3

In [11]:

class ImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, feature_extractor, transform=None):
        # If the input is a DataFrame, use it directly; otherwise, load the CSV file
        if isinstance(csv_file, pd.DataFrame):
            self.data = csv_file
        else:
            self.data = pd.read_csv(csv_file)
        
        self.img_dir = img_dir
        self.image_paths = self.data['image'].values
        self.labels = self.data['target'].values
        self.feature_extractor = feature_extractor
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Load the image
        img_path = os.path.join(self.img_dir, self.image_paths[idx]+'.jpg')
        image = Image.open(img_path).convert('RGB')
        
        # Apply custom transforms (if any)
        if self.transform:
            image = self.transform(image)

        # Preprocess the image using the feature extractor
        inputs = self.feature_extractor(images=image, return_tensors="pt")
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
        
        # Add the label
        inputs['target'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return inputs

# Collate function for DataLoader
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['target'] for x in batch])
    }

# Training Function with Progress Bar
def train_model(model,model_name, train_dataloader, val_dataloader, optimizer, scheduler, device, num_epochs=3):
    model.train()
    best_roc_auc = 0
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch"):
            optimizer.zero_grad()
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(pixel_values=pixel_values, labels=labels)
            loss = outputs.loss
            
            loss.backward()
            optimizer.step()
            scheduler.step()
            
            total_loss += loss.item()

        avg_loss = total_loss / len(train_dataloader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")
        
        # Validation
        roc_auc = evaluate_model(model, val_dataloader, device)
        if roc_auc > best_roc_auc:
            best_roc_auc = roc_auc
            torch.save(model.state_dict(), f"best_model_{model_name.replace('/', '_')}.pth")
        
    print(f"Best pAUC Score: {best_roc_auc:.4f}")
    return best_roc_auc

# Evaluation Function with Progress Bar
def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    # Wrap the dataloader with tqdm to show a progress bar
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating", unit="batch"):
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            
            # Forward pass
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    roc_auc = comp_score(pd.Series(all_labels), pd.Series(all_preds))
    print(f"pAUC Score: {roc_auc:.4f}")
    return roc_auc

# Main function to run the training and evaluation
def main():
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    for model_name in models: 
        feature_extractor = ViTImageProcessor.from_pretrained(model_name)
        
        # Load the model with ignore_mismatched_sizes=True
        model = ViTForImageClassification.from_pretrained(
            model_name,
            num_labels=2,  
            ignore_mismatched_sizes=True
        ).to(DEVICE)
        model.config
        # Load and split the dataset
        df = train_metadata_corr.copy()
        train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

        # Create datasets and dataloaders
        train_dataset = ImageDataset(csv_file=train_df, img_dir='/Users/jimmyhe/Desktop/KaggleCompetitions/ISISCANCER/MetaDataPlusProprocessed/train-image/image/', feature_extractor=feature_extractor)
        val_dataset = ImageDataset(csv_file=val_df, img_dir='/Users/jimmyhe/Desktop/KaggleCompetitions/ISISCANCER/MetaDataPlusProprocessed/train-image/image/', feature_extractor=feature_extractor)

        train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
        val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

        # Optimizer
        optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
        num_training_steps = len(train_dataloader) * num_epochs
        num_warmup_steps = int(0.1 * num_training_steps)  # 10% of total steps for warmup

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )
        # Train the model
        train_model(model, model_name, train_dataloader,val_dataloader, optimizer,scheduler, DEVICE, num_epochs=3)

        # Evaluate the model
        evaluate_model(model, val_dataloader, DEVICE)

        # Save the finetuned model
        save_dir = f"finetuned_model_{model_name.replace('/', '_')}"
        os.makedirs(save_dir, exist_ok=True)
        model.save_pretrained(save_dir)
        feature_extractor.save_pretrained(save_dir)

In [12]:


### REMEMBER TO SET SEEDS 
if __name__ == "__main__":
    main()


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 8/8 [00:40<00:00,  5.02s/batch]


Epoch 1/3, Loss: 0.6360


Evaluating: 100%|██████████| 2/2 [00:02<00:00,  1.22s/batch]


pAUC Score: 0.0261


Epoch 2/3: 100%|██████████| 8/8 [00:34<00:00,  4.30s/batch]


Epoch 2/3, Loss: 0.3814


Evaluating: 100%|██████████| 2/2 [00:02<00:00,  1.23s/batch]


pAUC Score: 0.0261


Epoch 3/3: 100%|██████████| 8/8 [00:35<00:00,  4.42s/batch]


Epoch 3/3, Loss: 0.2971


Evaluating: 100%|██████████| 2/2 [00:02<00:00,  1.16s/batch]


pAUC Score: 0.0261
Best pAUC Score: 0.0261


Evaluating: 100%|██████████| 2/2 [00:02<00:00,  1.11s/batch]


pAUC Score: 0.0261


Some weights of ViTForImageClassification were not initialized from the model checkpoint at facebook/deit-small-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 384]) in the checkpoint and torch.Size([2, 384]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 8/8 [00:11<00:00,  1.38s/batch]


Epoch 1/3, Loss: 0.5914


Evaluating: 100%|██████████| 2/2 [00:00<00:00,  2.09batch/s]


pAUC Score: 0.0240


Epoch 2/3: 100%|██████████| 8/8 [00:11<00:00,  1.47s/batch]


Epoch 2/3, Loss: 0.4318


Evaluating: 100%|██████████| 2/2 [00:01<00:00,  1.96batch/s]


pAUC Score: 0.0235


Epoch 3/3: 100%|██████████| 8/8 [00:11<00:00,  1.43s/batch]


Epoch 3/3, Loss: 0.3094


Evaluating: 100%|██████████| 2/2 [00:00<00:00,  2.02batch/s]


pAUC Score: 0.0235
Best pAUC Score: 0.0240


Evaluating: 100%|██████████| 2/2 [00:00<00:00,  2.10batch/s]

pAUC Score: 0.0235



