In [21]:
from datasets import load_dataset
from datasets import load_metric

from sklearn.metrics import accuracy_score

from transformers import TrainingArguments
from transformers import ViTFeatureExtractor
from transformers import ViTForImageClassification

import torch

from PIL import Image
import requests
import numpy as np


In [4]:
ds = load_dataset("imagefolder", data_dir="../input/cell-images-for-detecting-malaria/cell_images")
data = ds['train'].train_test_split(test_size=0.1)
labels = data["train"].features["label"].names
label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

metric = load_metric('accuracy')
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')


Resolving data files:   0%|          | 0/55120 [00:00<?, ?it/s]

Downloading and preparing dataset image_folder/default to /root/.cache/huggingface/datasets/image_folder/default-4767780be8639a06/0.0.0/ee92df8e96c6907f3c851a987be3fd03d4b93b247e727b69a8e23ac94392a091...
    

Downloading data files #0:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #3:   0%|          | 0/3445 [00:00<?, ?obj/s]

 

Downloading data files #2:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #1:   0%|          | 0/3445 [00:00<?, ?obj/s]

     

Downloading data files #4:   0%|          | 0/3445 [00:00<?, ?obj/s]

  

Downloading data files #6:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #7:   0%|          | 0/3445 [00:00<?, ?obj/s]

    

Downloading data files #11:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #8:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #10:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #15:   0%|          | 0/3444 [00:00<?, ?obj/s]

Downloading data files #5:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #12:   0%|          | 0/3444 [00:00<?, ?obj/s]

Downloading data files #14:   0%|          | 0/3444 [00:00<?, ?obj/s]

Downloading data files #9:   0%|          | 0/3445 [00:00<?, ?obj/s]

Downloading data files #13:   0%|          | 0/3444 [00:00<?, ?obj/s]

Downloading data files:   0%|          | 0/4 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/4 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset image_folder downloaded and prepared to /root/.cache/huggingface/datasets/image_folder/default-4767780be8639a06/0.0.0/ee92df8e96c6907f3c851a987be3fd03d4b93b247e727b69a8e23ac94392a091. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

Downloading builder script:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



In [5]:
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)
# Manually set image_mean and image_std based on ViT model
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_transforms = Compose(
    [
        RandomResizedCrop(224),  # Manually set size to 224
        RandomHorizontalFlip(),
        ToTensor(),
        normalize,
    ]
)

val_transforms = Compose(
    [
        Resize(224),  # Manually set size to 224
        CenterCrop(224),  # Manually set size to 224
        ToTensor(),
        normalize,
    ]
)


In [6]:
def preprocess_train(example_batch):
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
    return example_batch


In [7]:
train_ds = data['train']
val_ds = data['test']
test_ds = data['test']

train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)


In [8]:
model_name_or_path = 'google/vit-base-patch16-224-in21k'
model = ViTForImageClassification.from_pretrained(
    model_name_or_path, 
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)


Downloading (…)lve/main/config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
training_args = TrainingArguments(
    'finetuned-malaria-detection',
    per_device_train_batch_size=16,
    evaluation_strategy="steps",
    num_train_epochs=4,
    fp16=True,
    save_steps=100,
    eval_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    save_total_limit=2,
    remove_unused_columns=False,
    report_to='tensorboard',
    load_best_model_at_end=True,
    hub_strategy="end"
)


In [14]:
# Define calculate_dice_coefficient function
def calculate_dice_coefficient(predictions, references):
    # Implement the Dice coefficient calculation here
    pass  # Placeholder, replace with actual code


In [18]:
from sklearn.metrics import precision_recall_fscore_support


def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)
    references = eval_pred.label_ids

    # Calculate F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(references, predictions, average='weighted')

    # Calculate Dice coefficient
    dice_coefficient = calculate_dice_coefficient(predictions, references)

    return {
        'accuracy': accuracy_score(references, predictions),
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'dice_coefficient': dice_coefficient
    }



def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }


In [19]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)


In [22]:
train_results = trainer.train()


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Dice Coefficient
100,0.3453,0.225032,0.919448,0.91921,0.922676,0.919448,
200,0.4077,0.242527,0.939768,0.939761,0.93979,0.939768,
300,0.3647,0.165504,0.938316,0.938272,0.938847,0.938316,
400,0.3676,0.178958,0.949383,0.949388,0.949452,0.949383,
500,0.3757,0.165566,0.953556,0.953561,0.953824,0.953556,
600,0.3665,0.197266,0.935414,0.93533,0.936602,0.935414,
700,0.3453,0.214287,0.932329,0.932217,0.933954,0.932329,
800,0.3158,0.211714,0.920356,0.920121,0.923565,0.920356,
900,0.3838,0.182658,0.950835,0.950837,0.950854,0.950835,
1000,0.3587,0.198008,0.944666,0.944645,0.944872,0.944666,


Trainer is attempting to log a value of "None" of type <class 'NoneType'> for key "eval/dice_coefficient" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "None" of type <class 'NoneType'> for key "eval/dice_coefficient" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "None" of type <class 'NoneType'> for key "eval/dice_coefficient" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "None" of type <class 'NoneType'> for key "eval/dice_coefficient" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "None" of type <class 'NoneType'> for key "eval/dice_coefficient" as a scalar. This invocation of Te

In [25]:
torch.save(model.state_dict(), "model.pth")
trainer.save_model('model.h5')
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()
metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)


***** train metrics *****
  epoch                    =           4.0
  total_flos               = 14319690513GF
  train_loss               =        0.2943
  train_runtime            =    3:23:49.73
  train_samples_per_second =        16.224
  train_steps_per_second   =         1.014


Trainer is attempting to log a value of "None" of type <class 'NoneType'> for key "eval/dice_coefficient" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


***** eval metrics *****
  epoch                   =        4.0
  eval_accuracy           =     0.9634


TypeError: unsupported format string passed to NoneType.__format__

In [26]:
outputs = trainer.predict(test_ds)
print(outputs.metrics)

{'test_loss': 0.10912443697452545, 'test_accuracy': 0.9633526850507983, 'test_f1': 0.9633562465776424, 'test_precision': 0.9634419995144686, 'test_recall': 0.9633526850507983, 'test_dice_coefficient': None, 'test_runtime': 55.0872, 'test_samples_per_second': 100.059, 'test_steps_per_second': 12.507}


In [27]:
torch.cuda.is_available = lambda : False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [28]:
url = '/kaggle/input/cell-images-for-detecting-malaria/cell_images/Uninfected/C100P61ThinF_IMG_20150918_144104_cell_166.png'

image = Image.open(url)
inputs = feature_extractor(images=image, return_tensors="pt")
inputs = inputs.to(device)
outputs = model(**inputs)
logits = outputs.logits
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", id2label[predicted_class_idx])


Predicted class: Uninfected
