In [None]:
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

#the parameters for cnn model
IMG_SIZE = 128
BATCH_SIZE = 32
EPOCHS = 25 #epoch is the number of iterations of cnn kind of like layers but its to increase accuracy
LR = 1e-3 # lr is learning rate and its usually 1e - x where the lower the x the faster but riskier and viceversa

src = r"C:\Users\Hp\Desktop\uni\ML\project\extracted_faces"
out = r"C:\Users\Hp\Desktop\uni\ML\project\data_split"

#We're using torchvision's imagefolder that expects directories and not arrays/ variables
train_dir = os.path.join(out, "train")
test_dir = os.path.join(out, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)#exist_ok to avoid error if directory already exists

id = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d))]

train_id, test_id = train_test_split(id, test_size=0.2, random_state=42)

for i in train_id:
    shutil.copytree(os.path.join(src, i), os.path.join(train_dir, i), dirs_exist_ok=True)
for i in test_id:
    shutil.copytree(os.path.join(src, i), os.path.join(test_dir, i), dirs_exist_ok=True)

#prep and clean the data
train_trasnform = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE)), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.ToTensor(), transforms.Normalize([0.5]*3, [0.5]*3)])
test_transform = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE)), transforms.ToTensor(), transforms.Normalize([0.5]*3, [0.5]*3)])
train_ds = datasets.ImageFolder(train_dir, transform=train_trasnform)
test_ds  = datasets.ImageFolder(test_dir,  transform=test_transform)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False)

nb_classes = len(train_ds.classes)
print("Classes:", nb_classes)

class CNN(nn.Module):
    #why classes? PyToRcH requires containers and __init__ is the constructor where were just declaring the variables
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(128 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

model = CNN(nb_classes).to("cpu")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to("cpu"), y.to("cpu")
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1:02d} | train_loss={total_loss/len(train_loader):.4f}")

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to("cpu"), y.to("cpu")
        preds = model(x).argmax(1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print("TEST ACCURACY:", correct / total)

torch.save(model.state_dict(), "cnn_tts_cpu.pth")
