In [1]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
import logging
logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [2]:
!gsutil -m cp gs://tibot-ml-labeling/datasets/cats_vs_dogs_dataset.zip  ./

Copying gs://tibot-ml-labeling/datasets/cats_vs_dogs_dataset.zip...
/ [0/1 files][    0.0 B/593.6 MiB]   0% Done                                    ==> NOTE: You are downloading one or more large file(s), which would
run significantly faster if you enabled sliced object downloads. This
feature is enabled by default but requires that compiled crcmod be
installed (see "gsutil help crcmod").

\ [1/1 files][593.6 MiB/593.6 MiB] 100% Done  16.4 MiB/s ETA 00:00:00           
Operation completed over 1 objects/593.6 MiB.                                    


In [3]:
!unzip -q /content/cats_vs_dogs_dataset.zip

In [5]:
import os
import torch
import timm
import poutyne
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from poutyne.framework import Model, Callback
from google.cloud import storage

In [6]:
# Define the directory path
model_dir = '/content/models'

# Check if the directory exists, and create it if it doesn't
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

In [35]:
#  Define dataset paths
train_dir = '/content/dataset/train'
test_dir = '/content/dataset/test'

# Define data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create custom datasets
train_dataset = ImageFolder(train_dir, transform=data_transforms['train'])
test_dataset = ImageFolder(test_dir, transform=data_transforms['test'])

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [36]:
# Define your model
model_name = 'tf_efficientnetv2_b2'
num_classes = 2  # Number of classes (cats and dogs)

# Load the model architecture
model = timm.create_model(model_name, pretrained=True, num_classes=num_classes)

In [37]:
# Log in with your WANDB API key
wandb.login(key="1adf1a1fccd0e0da79c739f91daa90e0da69abb7")



True

In [38]:
class UploadModelCallback(poutyne.Callback):
    def __init__(self, bucket_name, remote_dir, num_epochs=1):
        super().__init__()
        self.num_epochs = num_epochs
        self.bucket_name = bucket_name
        self.remote_dir = remote_dir
        self.local_dir = "/content/models"

        self.storage_client = storage.Client()

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) >= 1:  # Upload the model checkpoint after each epoch
            # Save the model checkpoint locally
            checkpoint_path = os.path.join(self.local_dir, f"EfficientNetV2B2_checkpoint-{epoch + 1:02d}.pth")
            self.model.save_weights(checkpoint_path)

            # Upload the model checkpoint to Google Cloud Storage
            remote_filepath = os.path.join(self.remote_dir, os.path.basename(checkpoint_path))
            bucket = self.storage_client.bucket(self.bucket_name)
            blob = bucket.blob(remote_filepath)
            blob.upload_from_filename(checkpoint_path)

            print(f"Model checkpoint saved in {checkpoint_path} and uploaded to {remote_filepath}")

In [39]:
# Initialize wandb
wandb.init(project="cats-vs-dogs-pytorch")

In [40]:
# Define WandbCallback
class WandbCallback(poutyne.Callback):
    def on_epoch_end(self, epoch, logs):
        # Log metrics to Wandb
        wandb.log(logs, step=epoch)

In [41]:
# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [42]:
# Create the Poutyne Model
poutyne_model = Model(model, optimizer, criterion, batch_metrics=['accuracy'])

# Train the model with upload callback
num_epochs = 3
bucket_name = "tibot-ml-labeling"
remote_dir = "models/cats_vs_dogs_task/sarim/EfficientNetV2B2_pretrained_/model"

# Define the upload callback
upload_callback = UploadModelCallback(bucket_name, remote_dir, num_epochs)

# Modify the callbacks list to include the WandbCallback
callbacks = [upload_callback, WandbCallback()]

# Train the model with the updated callbacks list
poutyne_model.fit_generator(train_loader, test_loader, epochs=num_epochs, callbacks=callbacks)

Epoch: 1/3 Train steps: 563 Val steps: 148 23m20.70s loss: 0.185954 acc: 94.346859 val_loss: 0.049492 val_acc: 98.562975
Model checkpoint saved in /content/models/EfficientNetV2B2_checkpoint-02.pth and uploaded to models/cats_vs_dogs_task/sarim/EfficientNetV2B2_pretrained_/model/EfficientNetV2B2_checkpoint-02.pth
Epoch: 2/3 Train steps: 563 Val steps: 148 23m28.72s loss: 0.069403 acc: 97.509728 val_loss: 0.023412 val_acc: 99.260355
Model checkpoint saved in /content/models/EfficientNetV2B2_checkpoint-03.pth and uploaded to models/cats_vs_dogs_task/sarim/EfficientNetV2B2_pretrained_/model/EfficientNetV2B2_checkpoint-03.pth
Epoch: 3/3 Train steps: 563 Val steps: 148 23m22.64s loss: 0.057319 acc: 97.848805 val_loss: 0.018438 val_acc: 99.408284
Model checkpoint saved in /content/models/EfficientNetV2B2_checkpoint-04.pth and uploaded to models/cats_vs_dogs_task/sarim/EfficientNetV2B2_pretrained_/model/EfficientNetV2B2_checkpoint-04.pth


[{'epoch': 1,
  'time': 1400.6971935669999,
  'loss': 0.1859535815237735,
  'acc': 94.34685936631462,
  'val_loss': 0.04949173073810331,
  'val_acc': 98.56297548605241},
 {'epoch': 2,
  'time': 1408.720555935,
  'loss': 0.0694034790954376,
  'acc': 97.50972762476277,
  'val_loss': 0.02341248227664098,
  'val_acc': 99.2603550295858},
 {'epoch': 3,
  'time': 1402.6385119870001,
  'loss': 0.05731876048685495,
  'acc': 97.84880488991008,
  'val_loss': 0.018438215816659543,
  'val_acc': 99.40828402366864}]

In [43]:
# Calculate and print test accuracy
_, test_accuracy = poutyne_model.evaluate_generator(test_loader)
print(f"Test Accuracy: {test_accuracy * 1:.2f}%")

Test steps: 148 1m40.31s test_loss: 0.018438 test_acc: 99.408284                               
Test Accuracy: 99.41%


In [44]:
import numpy as np
import torch
from sklearn.metrics import classification_report, confusion_matrix

# Initialize lists to store predictions and true labels
true_labels = []
predicted_labels = []

# Set the model to evaluation mode
model.eval()

# Move the model to the same device as the input data
model = model.to(device)

# Initialize lists to store predictions and true labels
true_labels = []
predicted_labels = []

# Iterate over the test dataset and collect predictions and true labels
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)

        # Get predicted class (the class with the highest probability)
        predicted = outputs.argmax(axis=1)

        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())

# Calculate and print the classification report
class_names = train_dataset.classes
classification_rep = classification_report(true_labels, predicted_labels, target_names=class_names)
print("Classification Report:")
print(classification_rep)

# Calculate and print the confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("\nConfusion Matrix:")
print(conf_matrix)

Classification Report:
              precision    recall  f1-score   support

        cats       1.00      1.00      1.00      3505
        dogs       0.99      0.99      0.99      1227

    accuracy                           0.99      4732
   macro avg       0.99      0.99      0.99      4732
weighted avg       0.99      0.99      0.99      4732


Confusion Matrix:
[[3488   17]
 [  11 1216]]
