In [1]:
import torch
from torch import nn
from torchvision import transforms
from pathlib import Path
from PIL import Image
import random

In [2]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

## Load data

In [3]:
IMAGE_DIR = "../data"

In [4]:
image_path = Path(IMAGE_DIR)

In [5]:
path_classes = [ d for d in image_path.iterdir() if d.is_dir() and not d.name.startswith('.')]

In [6]:
samples = []
for path in path_classes:
    files = list(path.glob('*.png'))
    for file in files:
        samples.append((file , 1.0 if path.name == "cat" else 0.0))

In [7]:
transform = transforms.ToTensor()

In [8]:
random.shuffle(samples)

In [9]:
X = torch.zeros(len(samples),1,128,128)
y = torch.zeros(len(samples))

In [10]:
for idx in range(len(samples)):
    X[idx] = transform(Image.open(samples[idx][0]).resize((128,128)))
    y[idx] = samples[idx][1]

Above code loads images from IMAGE_DIR, shuffles the samples and then creates corresponding tensors. X for data and y for lables. In the lables 1.0 means cat and 0.0 means dog

In [11]:
X.dtype, y.dtype

(torch.float32, torch.float32)

## Build the model

In [12]:
class MyCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(128*128,32*32),
            nn.ReLU(),
            nn.Linear(32*32, 16*16),
            nn.ReLU(),
            nn.Linear(16*16,1)
        )
    def forward(self, x, **kwargs):
        logits = self.linear_relu_stack(x)
        return logits

In [13]:
model = MyCNN()

In [14]:
model.parameters

<bound method Module.parameters of MyCNN(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=16384, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=1, bias=True)
  )
)>

### Setup training loop

In [15]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [16]:
percentage = 0.8
split_index = int(len(X)*percentage)
X_train = X[:split_index]
y_train = y[:split_index]
X_test = X[split_index:]
y_test = y[split_index:]
X_train.shape

torch.Size([128, 1, 128, 128])

In [17]:
EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    optimizer.zero_grad()
    loss.backwards()
    optimizer.step()
    model.eval()
    print(f"Epoch {epoch} | Loss: {loss}")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16384x128 and 16384x1024)