In [33]:
from transformers import AutoImageProcessor, AutoModelForImageClassification
from PIL import Image
import requests

image_processor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224-in22k")

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [None]:
finetune_model = AutoModelForImageClassification.from_pretrained("microsoft/swin-base-patch4-window7-224-in22k", num_labels=10, ignore_mismatched_sizes=True)

### prepare dataset

In [35]:
import os
def load_image_data(data_dir):
    images = []
    labels = []

    # Iterate through the subdirectories (classes)
    for class_dir in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_dir)

        # Iterate through the images in each class
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            curr_image = Image.open(image_path)
            images.append(curr_image)
            labels.append(class_dir)
    labels = [int(label)-1 for label in labels]

    return images, labels

In [36]:
from PIL import Image
import os
import numpy as np
from datasets import Dataset


image_paths, labels = load_image_data('./dataset/CSE164_2023/Train_set/')
data = {"image": image_paths, "label": labels}
from datasets import load_dataset

dataset = Dataset.from_dict(data)

In [38]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [39]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [40]:
train_ds = dataset.with_transform(transforms)

In [41]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

In [42]:
import evaluate

accuracy = evaluate.load("accuracy")

In [43]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [44]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

## Supervised training using the labeled data

In [None]:
training_args = TrainingArguments(
    output_dir="./swin_t_output/final_test/",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)

trainer = Trainer(
    model=finetune_model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_ds,
    eval_dataset=train_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

## test accuracy using the model trained only using labeled data

In [None]:
import os
import torch
test_dir = "./dataset/CSE164_2023/test_set/Test_set/"
image_labels = []
count = 0
for file in os.listdir(test_dir):
    file_path = os.path.join(test_dir, file)
    image = Image.open(file_path)
    inputs = image_processor(image, return_tensors="pt")
    inputs['pixel_values'] = inputs['pixel_values'].cuda()
    #inputs = image_processor(image, return_tensors="tf")
    logits = finetune_model(**inputs).logits
    predicted_class_id = int(torch.argmax(logits, axis=-1)[0])
    image_labels.append([file, predicted_class_id])
    print(count)
    count += 1

In [50]:
import csv
file_path = "submission.csv"

# Open the CSV file in write mode
with open(file_path, mode="w", newline="") as file:
    # Create a CSV writer object
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Image_id", "label"])

    # Write the data rows
    writer.writerows(image_labels)

## Pseudo Labeling and semi-supervised training

In [52]:
import torch
data_dir = "./dataset/CSE164_2023/unlabeled_data/Unlabeled_data/"
def load_test_set(unlabled_data):
    images = []
    labels = []
    count = 0
    for image_file in unlabled_data:
        image_path = os.path.join(data_dir, image_file)
        img = Image.open(image_path)
        inputs = image_processor(img, return_tensors="pt")
        inputs['pixel_values'] = inputs['pixel_values'].cuda()
        
        logits = finetune_model(**inputs).logits
        predicted_class_id = int(torch.argmax(logits, axis=-1)[0])

        images.append(img)
        labels.append(predicted_class_id)
        print(count)
        count += 1

    return images, labels

In [20]:
def prepare_dataset(curr_data):
    unlabeles_images, pseudo_labels = load_test_set(curr_data)
    unlabelled_data = {"image": unlabeles_images, "label": pseudo_labels}
    unlabeled_dataset = Dataset.from_dict(unlabelled_data)
    unlabeled_train_ds = unlabeled_dataset.with_transform(transforms)

    return unlabeled_train_ds

In [None]:
unlabeled_data = os.listdir(data_dir)
number_batches = len(unlabeled_data)//2000

for i in range(number_batches):
    curr_data = unlabeled_data[i*2000:(i+1)*2000]
    unlabeled_train_ds = prepare_dataset(curr_data)

    training_args = TrainingArguments(
        output_dir="./swin_t_output/final_finetuning/",
        remove_unused_columns=False,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=5e-5,
        per_device_train_batch_size=16,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        warmup_ratio=0.1,
        logging_steps=10,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        push_to_hub=False,
    )

    trainer_uds = Trainer(
        model=finetune_model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=unlabeled_train_ds,
        eval_dataset=train_ds,
        compute_metrics=compute_metrics,
    )

    trainer_uds.train()
    

## Inference on test dataset

In [None]:
import os
test_dir = "./dataset/CSE164_2023/test_set/Test_set/"
image_labels = []
count = 0
for file in os.listdir(test_dir):
    file_path = os.path.join(test_dir, file)
    image = Image.open(file_path)
    inputs = image_processor(image, return_tensors="pt")
    inputs['pixel_values'] = inputs['pixel_values'].cuda()
    #inputs = image_processor(image, return_tensors="tf")
    logits = finetune_model(**inputs).logits
    predicted_class_id = int(torch.argmax(logits, axis=-1)[0])
    image_labels.append([file, predicted_class_id])
    print(count)
    count += 1

In [72]:
import csv
file_path = "submission.csv"

# Open the CSV file in write mode
with open(file_path, mode="w", newline="") as file:
    # Create a CSV writer object
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Image_id", "label"])

    # Write the data rows
    writer.writerows(image_labels)

## Reproduce result using trained (saved) model

In [73]:
from transformers import AutoImageProcessor, AutoModelForImageClassification
saved_model = AutoModelForImageClassification.from_pretrained("./swin_t_output/final_finetuning/checkpoint-30/").cuda()

In [None]:
from transformers import AutoImageProcessor, AutoModelForImageClassification
from PIL import Image

image_processor = AutoImageProcessor.from_pretrained("microsoft/swin-base-patch4-window7-224-in22k")

In [None]:
import os
import torch

test_dir = "./dataset/CSE164_2023/test_set/Test_set/"
image_labels = []
count = 0
for file in os.listdir(test_dir):
    file_path = os.path.join(test_dir, file)
    image = Image.open(file_path)
    inputs = image_processor(image, return_tensors="pt")
    inputs['pixel_values'] = inputs['pixel_values'].cuda()
    #inputs = image_processor(image, return_tensors="tf")
    logits = saved_model(**inputs).logits
    predicted_class_id = int(torch.argmax(logits, axis=-1)[0])
    image_labels.append([file, predicted_class_id])
    print(count)
    count += 1

In [79]:
import csv
file_path = "submission.csv"

# Open the CSV file in write mode
with open(file_path, mode="w", newline="") as file:
    # Create a CSV writer object
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Image_id", "label"])

    # Write the data rows
    writer.writerows(image_labels)