In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
import subprocess
import threading
import time
import copy
import torch.cuda.amp as amp
from torch.optim import lr_scheduler

# Define custom dataset
class CatBreedsDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# GPU usage monitoring function
def get_gpu_usage():
    result = subprocess.run(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'], 
                            stdout=subprocess.PIPE)
    return int(result.stdout.decode('utf-8').strip())

# Dynamic DataLoader class
class DynamicDataLoader:
    def __init__(self, dataset, batch_size=32, num_workers=4, pin_memory=True, prefetch_factor=2):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.pin_memory = pin_memory
        self.prefetch_factor = prefetch_factor
        self.loader = self.create_loader()
        self.adjusting = False
        self.target_gpu_usage = 95

    def create_loader(self):
        return DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, 
                          pin_memory=self.pin_memory, prefetch_factor=self.prefetch_factor, persistent_workers=True)

    def adjust_num_workers(self):
        while self.adjusting:
            gpu_usage = get_gpu_usage()
            print(f"Current GPU usage: {gpu_usage}%")
            if (gpu_usage < self.target_gpu_usage - 10) and (self.num_workers < 16):
                self.num_workers += 1
                print(f"Increasing num_workers to {self.num_workers}")
            elif (gpu_usage > self.target_gpu_usage + 10) and (self.num_workers > 1):
                self.num_workers -= 1
                print(f"Decreasing num_workers to {self.num_workers}")
            self.loader = self.create_loader()
            time.sleep(20)

    def start_adjusting(self):
        self.adjusting = True
        self.adjust_thread = threading.Thread(target=self.adjust_num_workers)
        self.adjust_thread.start()

    def stop_adjusting(self):
        self.adjusting = False
        self.adjust_thread.join()

    def get_loader(self):
        return self.loader

# Check if GPU is available
if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available. Please check your GPU installation.")

# Set the path to your dataset
data_dir = './CatBreeds/'
categories = os.listdir(data_dir)

# Prepare data
image_paths = []
labels = []

for idx, category in enumerate(categories):
    category_path = os.path.join(data_dir, category)
    for img_name in os.listdir(category_path):
        image_paths.append(os.path.join(category_path, img_name))
        labels.append(idx)

# Split data into train and test sets
train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.2, stratify=labels, random_state=42)

# Define transformations
size = 320
train_transforms = transforms.Compose([
    transforms.Resize((size, size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomResizedCrop(size, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((size, size)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = CatBreedsDataset(train_paths, train_labels, transform=train_transforms)
val_dataset = CatBreedsDataset(val_paths, val_labels, transform=val_transforms)

# Create dynamic data loader
dynamic_loader = DynamicDataLoader(train_dataset)
dynamic_loader.start_adjusting()

# Define the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1).to(device)

# Get the number of features
dummy_input = torch.randn(1, 3, size, size).to(device)
base_model.eval()
with torch.no_grad():
    dummy_output = base_model.features(dummy_input)
    num_features = dummy_output.shape[1] * dummy_output.shape[2] * dummy_output.shape[3]

class CustomModel(nn.Module):
    def __init__(self, base_model, num_classes, dropout):
        super(CustomModel, self).__init__()
        self.base_model = base_model
        self.dropout = nn.Dropout(p=dropout)
        self.fc = nn.Linear(num_features, num_classes)
       
    def forward(self, x):
        x = self.base_model.features(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Hyperparameters
batch_size = 32
lr = 0.001
weight_decay = 0.001
dropout = 0.5
num_epochs = 10

# Set up data loaders
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, prefetch_factor=4, persistent_workers=True)
dataloaders = {'train': dynamic_loader.get_loader(), 'val': val_loader}

# Model, criterion, optimizer, and scheduler
model = CustomModel(base_model, len(categories), dropout).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scaler = amp.GradScaler()
scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=len(dataloaders['train']), epochs=num_epochs)

# Training the model
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
patience = 12
trigger_times = 0

for epoch in range(num_epochs):
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                with amp.autocast():
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                if phase == 'train':
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            trigger_times = 0
        elif phase == 'val':
            trigger_times += 1
            if trigger_times >= patience:
                print('Early stopping!')
                model.load_state_dict(best_model_wts)
                dynamic_loader.stop_adjusting()
                torch.save(model.state_dict(), 'cat_breed_classifier_best.pth')
                exit()

    scheduler.step()

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'cat_breed_classifier.pth')

dynamic_loader.stop_adjusting()


Current GPU usage: 0%
Increasing num_workers to 5
Current GPU usage: 0%
Increasing num_workers to 7
Current GPU usage: 8%
Increasing num_workers to 6
Current GPU usage: 9%
Increasing num_workers to 8
Current GPU usage: 10%
Increasing num_workers to 7
Current GPU usage: 4%
Increasing num_workers to 9
Current GPU usage: 7%
Increasing num_workers to 8
Current GPU usage: 5%
Increasing num_workers to 10
Current GPU usage: 6%
Increasing num_workers to 9
Current GPU usage: 8%
Increasing num_workers to 11
Current GPU usage: 10%
Increasing num_workers to 10
Current GPU usage: 21%
Increasing num_workers to 12
Current GPU usage: 9%
Increasing num_workers to 11
Current GPU usage: 3%
Increasing num_workers to 13
Current GPU usage: 0%
Increasing num_workers to 12
Current GPU usage: 8%
Increasing num_workers to 14
Current GPU usage: 8%
Increasing num_workers to 13
