In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("bhavikjikadara/dog-and-cat-classification-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/bhavikjikadara/dog-and-cat-classification-dataset?dataset_version_number=1...


100%|██████████| 775M/775M [04:06<00:00, 3.30MB/s] 

Extracting files...





Path to dataset files: C:\Users\ruhalis\.cache\kagglehub\datasets\bhavikjikadara\dog-and-cat-classification-dataset\versions\1


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.Grayscale(), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

In [11]:
dataset = datasets.ImageFolder(root='PetImages', transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoader objects for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [12]:
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)  # first hidden layer
        self.fc2 = nn.Linear(512, 256)          # second hidden layer
        self.fc3 = nn.Linear(256, 128)           # third hidden layer
        self.fc4 = nn.Linear(128, 64) 
        self.fc5 = nn.Linear(64, num_classes)  # output layer
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # Flatten the image tensor: from (batch_size, 1, 64, 64) to (batch_size, 1*64*64)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)  # raw logits output; CrossEntropyLoss will apply softmax
        return x

# Calculate the input size (grayscale image has one channel)
input_size = 1 * 256 * 256
# Number of classes determined from the dataset (should be 2 for your two classes)
num_classes = len(dataset.classes)

# Instantiate the model
model = SimpleClassifier(input_size, num_classes)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleClassifier(
  (fc1): Linear(in_features=65536, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=2, bias=True)
  (relu): ReLU()
)

In [14]:
num_epochs = 10  
for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # Evaluate on the validation set
    model.eval()  # set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            # Get predictions from the maximum value
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_accuracy = 100 * correct / total
    print(f'Validation Accuracy: {val_accuracy:.2f}%')



Epoch [1/10], Loss: 0.6871
Validation Accuracy: 60.30%
Epoch [2/10], Loss: 0.6533
Validation Accuracy: 60.88%
Epoch [3/10], Loss: 0.6393
Validation Accuracy: 61.36%
Epoch [4/10], Loss: 0.6229
Validation Accuracy: 62.80%
Epoch [5/10], Loss: 0.5972
Validation Accuracy: 63.08%
Epoch [6/10], Loss: 0.5793
Validation Accuracy: 62.56%
Epoch [7/10], Loss: 0.5497
Validation Accuracy: 63.70%
Epoch [8/10], Loss: 0.5175
Validation Accuracy: 64.00%
Epoch [9/10], Loss: 0.4766
Validation Accuracy: 60.32%
Epoch [10/10], Loss: 0.4403
Validation Accuracy: 61.94%


In [15]:
from PIL import Image

# Specify the path to your test image
image_path = 'dog.jpg'

# Open the image using PIL
image = Image.open(image_path)

# Apply the same transformation pipeline used during training
# (This converts the image to 64x64, grayscale, tensor and normalizes it)
image = transform(image)

# Add a batch dimension since the model expects a batch of images
image = image.unsqueeze(0).to(device)

# Set the model to evaluation mode and perform inference
model.eval()
with torch.no_grad():
    outputs = model(image)
    # Get the predicted class (index of the highest logit)
    _, predicted_class = torch.max(outputs, 1)

# Print the predicted class name
print("Predicted class:", dataset.classes[predicted_class.item()])

Predicted class: Dog
