In [22]:
import numpy as np
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Subset, Dataset
from torch.optim import Adam
import matplotlib.pyplot as plt
import deeplake
from sklearn.model_selection import train_test_split

In [23]:
# Checking for device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())

True


In [24]:
# Transforms
transformer = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [25]:
ds = deeplake.load('hub://activeloop/kaggle-cats-dogs')

/

Opening dataset in read-only mode as you don't have write permissions.


|

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/kaggle-cats-dogs



|

hub://activeloop/kaggle-cats-dogs loaded successfully.



 

In [5]:
ds

Dataset(path='hub://activeloop/kaggle-cats-dogs', read_only=True, tensors=['images', 'labels'])

In [26]:
# Custom Dataset Wrapper
class DeeplakeDatasetWrapper(Dataset):
    def __init__(self, ds, transform=None):
        self.ds = ds
        self.transform = transform

    def __len__(self):
        return len(self.ds)

    def __getitem__(self, idx):
        # Get image and label
        image = self.ds[idx]['images'].numpy()
        label = self.ds[idx]['labels'].numpy()

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        return image, label

# Wrap the dataset with the custom wrapper
wrapped_ds = DeeplakeDatasetWrapper(ds, transform=transformer)

# Get indices for train/test split
indices = list(range(len(wrapped_ds)))
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)

# Create subsets for training and testing
train_ds = Subset(wrapped_ds, train_indices)
test_ds = Subset(wrapped_ds, test_indices)

# Create PyTorch DataLoaders for train and test sets
train_loader = DataLoader(train_ds, batch_size=256, shuffle=True, num_workers=0)
test_loader = DataLoader(test_ds, batch_size=256, shuffle=False, num_workers=0)


In [27]:
train_ds

<torch.utils.data.dataset.Subset at 0x19befd507a0>

In [28]:
class ConvNet(nn.Module):
    def __init__(self, num_classes=2):
        super(ConvNet, self).__init__()
        # Convolution and Pooling Layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        self.relu3 = nn.ReLU()
        self.fc = nn.Linear(in_features=32*75*75, out_features=num_classes)

    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        output = self.pool(output)
        output = self.conv2(output)
        output = self.relu2(output)
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        output = output.view(-1, 32*75*75)
        output = self.fc(output)
        return output

In [29]:
model = ConvNet(num_classes=2).to(device)

In [30]:
 # Optimizer and Loss Function
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

In [31]:
num_epochs = 20

In [32]:
# Initialize lists to store training and test metrics
train_losses = []
test_accuracies = []
train_accuracies = []


In [33]:
# Model training and saving the best model
best_accuracy = 0
for epoch in range(num_epochs):
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.cpu().item() * images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        train_accuracy += (prediction == labels.data).sum().item()

    train_accuracy /= len(train_ds)
    train_loss /= len(train_ds)
    
    # Store train loss and accuracy
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    
    # Evaluation on test dataset
    model.eval()
    test_accuracy = 0.0
    print("Testing started")
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, prediction = torch.max(outputs.data, 1)
            test_accuracy += (prediction == labels.data).sum().item()

    test_accuracy /= len(test_ds)
    test_accuracies.append(test_accuracy)

    print(f'Epoch: {epoch} Train Loss: {train_loss:.4f} Train Accuracy: {train_accuracy:.4f} Test Accuracy: {test_accuracy:.4f}')

    # Save the best model
    if test_accuracy > best_accuracy:
        print("Best model saved")
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy = test_accuracy


Exception ignored in: <function Dataset.__del__ at 0x0000019BC04D3E20>
Traceback (most recent call last):
  File "C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\deeplake\core\dataset\dataset.py", line 1377, in __getattr__
    return self.__getitem__(key)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\deeplake\core\dataset\dataset.py", line 545, in __getitem__
    is_iteration = is_iteration or self.is_iteration
                                   ^^^^^^^^^^^^^^^^^
  File "C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\deeplake\core\dataset\dataset.py", line 1377, in __getattr__
    return self.__getitem__(key)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\site-packages\deeplake\core\dataset\dataset.py", line 545, in __getitem__
    is_iteration = is_iteration or self.is_iteration
                                   ^^^^^^^^^^^^^^

TypeError: Unexpected type <class 'numpy.ndarray'>