## Imports

In [1]:
import torch
from torch import nn
from torch import Tensor
import torch.optim as optim
from torchvision import models, transforms, datasets

torch.cuda.is_available()

True

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import glob
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import cv2

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:

vgg16 = models.vgg16(pretrained=True)

# Freeze the base model
for param in vgg16.parameters():
    param.requires_grad = False

# Modify the classifier
vgg16.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, 1),
    nn.Sigmoid()
)

# transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((150, 150)),
        transforms.ToTensor(),
    ]),
    'validation': transforms.Compose([
        transforms.Resize((150, 150)),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.Resize((150, 150)),
        transforms.ToTensor(),
    ]),
}

# Define datasets
data_dir = 'C:/ml/data/kaggle/processed/cats_and_dogs_small'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'validation', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=20, shuffle=True, num_workers=4) for x in ['train', 'validation', 'test']}



In [None]:
def extract_features(model, dataloader, sample_count):
    features = torch.zeros(sample_count, 512 * 7 * 7)
    labels = torch.zeros(sample_count)
    i = 0
    for inputs, labels_batch in dataloader:
        inputs = inputs.to(device)
        labels_batch = labels_batch.to(device)
        features_batch = model(inputs)
        features[i * 20 : (i + 1) * 20] = features_batch.view(20, -1)
        labels[i * 20 : (i + 1) * 20] = labels_batch
        i += 1
        if i * 20 >= sample_count:
            break
    return features, labels

# Extract features
train_features, train_labels = extract_features(vgg16, dataloaders['train'], 2000)
validation_features, validation_labels = extract_features(vgg16, dataloaders['validation'], 1000)
test_features, test_labels = extract_features(vgg16, dataloaders['test'], 1000)

model = vgg16.to(device)
criterion = nn.BCELoss()
optimizer = optim.RMSprop(model.classifier.parameters(), lr=2e-5)



In [None]:

def train_model(model, criterion, optimizer, num_epochs=30):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.view(-1), labels.float())
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / 2000
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

train_model(model, criterion, optimizer)