In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm

REBUILD_DATA = False
DataList = "PetImages/Traing_Data_List.npy"

class DogsVSCats():
    IMG_SIZE = 50
    CATS = "PetImages/Cat"
    DOGS = "PetImages/Dog"
    LABELS = {CATS: 0, DOGS: 1}
    trainingData = []
    catCount = 0
    dogCount = 0
    
    def make_training_data(self):
        for label in self.LABELS:    # label -> path
            print(label)
            for f in tqdm(os.listdir(label)):    # f -> img name, ex: 1.jpg 2.jpg ...
                try:
                    path = os.path.join(label, f)
                    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                    # one hot, np.eye(2)[0] -> [1, 0], np.eye(5)[2] -> [0, 0, 1, 0, 0]
                    self.trainingData.append([np.array(img), np.eye(2)[self.LABELS[label]]])

                    if label == self.CATS:
                        self.catCount += 1
                    elif label == self.DOGS:
                        self.dogCount += 1
                except Exception as e:
                    pass
                    #print("IMG ERROR: ", e)
        
        np.random.shuffle(self.trainingData)
        np.save(DataList, self.trainingData)
        print("Cats: ", self.catCount)
        print("Dogs: ", self.dogCount)
            

if REBUILD_DATA:
    dogsvscats = DogsVSCats()
    dogsvscats.make_training_data()
                
training_data = np.load(DataList, allow_pickle=True)
print(f"Data load down! {len(training_data)} imgs")

Data load down! 24946 imgs


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

# CPU 
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # img size = 50*50
        # input is 1 and output 32 features with 5*5 kernel size
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)
        self.pool = nn.MaxPool2d(2, 2)
        # img size (W) 50
        # kernel size (K) 5
        # stride (S) 1 (default)
        # padding (P) 0 (default)
        # formula => res = (W - K + 2*P) / s + 1(odd)
        # -> max pool (2,2) -> res /2
        # conv1 (50 - 5 + 2*0) / 1 + 1(odd) = 46  -> max pool /2 = 23
        # conv2 (23 - 5 + 2*0) / 1 (even) = 18  -> max pool /2 = 9
        # conv3 (9 - 5 + 2*0) / 1 (ecen) = 4  -> max pool /2 = 2
        # to fully connected is 128 * 2 * 2
        self.fc1 = nn.Linear(128*2*2, 512)
        self.fc2 = nn.Linear(512, 2)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        # x = x.view(-1, 128*2*2)   same as torch.flatten(x, start_dim=1)
        x = torch.flatten(x, start_dim=1)  
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)

net = Net()


In [3]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

X = torch.Tensor([i[0] for i in training_data]).view(-1, 50, 50)
X = X/255.0  # to 0. ~ 1
y = torch.Tensor([i[1] for i in training_data])

VAL_PCT = 0.1
val_size = int(len(X)*VAL_PCT)
print(val_size)

2494


In [4]:
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]
print(len(train_X))
print(len(test_X))

22452
2494


In [5]:
BATCH_SIZE = 100
EPOCHS = 1

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
        batch_X = train_X[i:i+BATCH_SIZE].view(-1,1, 50, 50)
        batch_y = train_y[i:i+BATCH_SIZE]
        
        net.zero_grad()
        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
print(loss)


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [01:03<00:00,  3.53it/s]

tensor(0.2147, grad_fn=<MseLossBackward>)





In [7]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 50, 50))[0]
        predicted_class = torch.argmax(net_out)
        if predicted_class == real_class:
            correct += 1
        total += 1
        
print(f"Acc -> {round(correct/total, 3)}%")

100%|█████████████████████████████████████████████████████████████████████████████| 2494/2494 [00:06<00:00, 392.50it/s]

Acc -> 0.656%



