In [1]:
!pip install evaluate

Collecting evaluate
  Obtaining dependency information for evaluate from https://files.pythonhosted.org/packages/70/63/7644a1eb7b0297e585a6adec98ed9e575309bb973c33b394dae66bc35c69/evaluate-0.4.1-py3-none-any.whl.metadata
  Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.1


In [2]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
from torchvision.transforms import v2
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader



In [3]:
use_s3 = 0

EPOCH = 5
BATCH_SIZE = 32

LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5

In [4]:
import wandb
# This is secret and shouldn't be checked into version control
WANDB_API_KEY="349b387de860518b8d586052fab2caa41953cb97"
wandb.login(key=WANDB_API_KEY)
# Name and notes optional
WANDB_NAME="my wandb name"
WANDB_NOTES="my wandb notes"
# wandb.init(mode="disabled")
run = wandb.init(
    # Set the project where this run will be logged
    project="cats-vs-dogs",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": LEARNING_RATE,
        "epochs": EPOCH,
        "batch_size": BATCH_SIZE,
        "weight_decay": WEIGHT_DECAY
    },
)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mtzu-chun-huang-tw[0m ([33mteam-tc-huang[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.16.1
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20231224_134457-17ie4836[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mfaithful-flower-12[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/team-tc-huang/cats-vs-dogs[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/team-tc-huang/cats-vs-dogs/runs/17ie4836[0m


In [5]:
# %%writefile ./scripts/train.py
# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()

    # Get the name of the GPU(s)
    gpu_names = [torch.cuda.get_device_name(i) for i in range(num_gpus)]

    print(f"GPU(s) available: {num_gpus}")
    for i, gpu_name in enumerate(gpu_names):
        print(f"GPU {i}: {gpu_name}")
else:
    print("No GPU available. PyTorch will run on CPU.")

GPU(s) available: 1
GPU 0: Tesla P100-PCIE-16GB


In [6]:
from transformers import ConvNextImageProcessor, ResNetForImageClassification

image_processor = ConvNextImageProcessor.from_pretrained("microsoft/resnet-50")
# label2id = {"Cat": 0, "Dog": 1}
# id2label = {0: "Cat", 1: "Dog"}

model = ResNetForImageClassification.from_pretrained(
    "microsoft/resnet-50",
    ignore_mismatched_sizes=True,
    num_labels=1
     # num_labels=len(label2id),
     # id2label=id2label,
     # label2id=label2id
)
model.classifier[1] = nn.Linear(in_features=2048, out_features=1, bias=True)
model.classifier.add_module("2", nn.Sigmoid())

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# print(f"device: {device}")
# model = model.to(device)



preprocessor_config.json:   0%|          | 0.00/266 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/103M [00:00<?, ?B/s]

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([1, 2048]) in the model instantiated
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([1]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from PIL import Image

training_dataset_dir = "/kaggle/input/cats-vs-dogs/dataset/training_dataset"
validation_dataset_dir = "/kaggle/input/cats-vs-dogs/dataset/validation_dataset"

class CatDogDataset(Dataset):
    def __init__(self, dataset_dir, transform=None):
        
        cat_image_dir = os.path.join(dataset_dir, 'Cat', '*.jpg')
        cat_image_paths = glob.glob(cat_image_dir)
        print(f"cat_image_paths: {cat_image_dir}, len: {len(cat_image_paths)}")
        cat_image_labels = torch.zeros(size=(len(cat_image_paths),))
        
        dog_image_dir = os.path.join(dataset_dir, 'Dog', '*.jpg')
        dog_image_paths = glob.glob(dog_image_dir)
        print(f"dog_image_paths: {dog_image_dir}, len: {len(dog_image_paths)}")
        dog_image_labels = torch.ones(size=(len(dog_image_paths),))
        
        self.image_paths = np.concatenate((cat_image_paths, dog_image_paths))
        self.image_labels = np.concatenate(( cat_image_labels, dog_image_labels))

        self.transform = transform
        
         
    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            image = np.array(image)
            iamge = self.transform(image)
            image = Image.fromarray(image)
            
        label = self.image_labels[idx]
        label = torch.from_numpy(np.array([[label]]))
        return {'image': image,
                 'label': label}
    
    def label_dict(self):
        return {0:'Cat', 1:'Dog'}
    
    def get_cat_dog_image(self):
        cat_image = read_image(self.image_paths[0])
        dog_image = read_image(self.image_paths[-1])
        return cat_image, dog_image

train_transforms = v2.Compose(
    [
#         v2.ToDtype(torch.float32),
#         v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#         v2.Resize(size=(224, 224)),
    ]
)


train_dataset = CatDogDataset(training_dataset_dir, transform=train_transforms)
validation_dataset = CatDogDataset(validation_dataset_dir, transform=None)

cat_image_paths: /kaggle/input/cats-vs-dogs/dataset/training_dataset/Cat/*.jpg, len: 5412
dog_image_paths: /kaggle/input/cats-vs-dogs/dataset/training_dataset/Dog/*.jpg, len: 10788
cat_image_paths: /kaggle/input/cats-vs-dogs/dataset/validation_dataset/Cat/*.jpg, len: 588
dog_image_paths: /kaggle/input/cats-vs-dogs/dataset/validation_dataset/Dog/*.jpg, len: 1212


In [8]:
# train_dataset[0]

In [9]:
# from torch.utils.data import Subset

# # Assuming train_dataset is a PyTorch Dataset
# indices = range(0, 1000)  # Define the range of indices you want
# train_dataset = Subset(train_dataset, indices)
# validation_dataset = Subset(validation_dataset, indices)

In [10]:
# from torch.utils.data import Subset

# # Assuming train_dataset is a PyTorch Dataset
# indices = range(0, 1000)  # Define the range of indices you want
# train_dataset = Subset(train_dataset, indices)
# validation_dataset = Subset(validation_dataset, indices)

In [11]:
from datetime import datetime
from transformers import TrainingArguments, Trainer
import evaluate

print(f"epoch: {EPOCH}")
print(f"batch_size: {BATCH_SIZE}")

TRAIN_DATASET = train_dataset
VALIDATION_DATASET = validation_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")
time_now = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")

my_batch = None

training_args = TrainingArguments(
    output_dir=f"detr-resnet-50_{time_now}",
    per_device_train_batch_size=int(BATCH_SIZE),
    per_device_eval_batch_size=int(BATCH_SIZE),
    num_train_epochs=int(EPOCH),
    save_strategy = "epoch",
    evaluation_strategy="epoch",
#     fp16=(device.type == "cuda"),
    fp16=False,
    # save_steps=200,
    logging_steps=8,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    save_total_limit=2,
    seed=42,
    load_best_model_at_end = True,
    # metric_for_best_model="loss",
    remove_unused_columns=False,
    torch_compile=False,
#     dataloader_pin_memory=True,
#     dataloader_num_workers=0,
    push_to_hub=False,
)


def collate_fn(batch):
    image_list = [item['image'] for item in batch]
    label_list = [item['label'] for item in batch]
    pixel_values = image_processor(image_list, return_tensors="pt").pixel_values
    batch_ = {}
    batch_["pixel_values"] = pixel_values#.to(device)
    batch_["labels"] = torch.stack(label_list)#.to(device)
    return batch_


accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
#     predictions = np.argmax(predictions, axis=1)
    threshold = 0.5
    predictions = np.round(predictions)
    print(f"predictions: {predictions}")
    print(f"labels: {labels}")
    return accuracy.compute(predictions=predictions, references=labels)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    train_dataset=TRAIN_DATASET,
    eval_dataset=VALIDATION_DATASET,
    compute_metrics = compute_metrics
    # tokenizer=image_processor,
)

trainer.train()
trainer.save_model(f"detr-resnet-50_{time_now}_best1")

epoch: 5
batch_size: 32
device: cuda


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.014,0.013955,0.983333
2,0.0176,0.019458,0.975
3,0.0037,0.006372,0.991111
4,0.0003,0.009036,0.987222
5,0.0003,0.008679,0.988333




predictions: [[0.]
 [0.]
 [0.]
 ...
 [1.]
 [1.]
 [1.]]
labels: [[[0.]]

 [[0.]]

 [[0.]]

 ...

 [[1.]]

 [[1.]]

 [[1.]]]




predictions: [[0.]
 [0.]
 [0.]
 ...
 [1.]
 [1.]
 [1.]]
labels: [[[0.]]

 [[0.]]

 [[0.]]

 ...

 [[1.]]

 [[1.]]

 [[1.]]]




predictions: [[0.]
 [0.]
 [0.]
 ...
 [1.]
 [1.]
 [1.]]
labels: [[[0.]]

 [[0.]]

 [[0.]]

 ...

 [[1.]]

 [[1.]]

 [[1.]]]




predictions: [[0.]
 [0.]
 [0.]
 ...
 [1.]
 [1.]
 [1.]]
labels: [[[0.]]

 [[0.]]

 [[0.]]

 ...

 [[1.]]

 [[1.]]

 [[1.]]]




predictions: [[0.]
 [0.]
 [0.]
 ...
 [1.]
 [1.]
 [1.]]
labels: [[[0.]]

 [[0.]]

 [[0.]]

 ...

 [[1.]]

 [[1.]]

 [[1.]]]
