In [None]:
from torchvision import transforms

# Neke dodatne tranformacije podataka samo za resnet
Definiraju se dvije grupe transformacija: 
*train_tf* za trening skup podataka i 
*val_tf* za validacioni skup podataka. 
Prvi uključuje augmentaciju podataka poput nasumičnog izrezivanja, rotacije, promjene boja, sivih tonova i horizontalnog preokretanja. Drugi koristi samo promjenu veličine i centralno izrezivanje. Obje transformacije normalizuju pikselne vrijednosti slika.

In [None]:
train_tf = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.5,1.0), ratio=(0.75,1.33)),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std =[0.229,0.224,0.225]),
])

val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std =[0.229,0.224,0.225]),
])

In [None]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

Slijedi učitavanje trening i validacionog skupa podataka
Zatim se kreiraju DataLoader objekti (train_loader i val_loader) za efikasno učitavanje podataka u serijama (batch-evima) tokom treninga i validacije.

In [None]:
train_ds = ImageFolder(r"F:\projekt13\data_changed\train", transform=train_tf)
val_ds   = ImageFolder(r"F:\projekt13\data_changed\val",   transform=val_tf)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4)

torch i torch.nn module za rad sa PyTorch tensorima i neuronskim mrežama.

resnet18 model i ResNet18_Weights iz torchvision.models za korištenje preobučenog ResNet-18 modela.


In [None]:
import torch
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

Ovdje prvo provjeravamo imamo li jaku grafičku karticu (GPU) na računaru. Ako imamo, koristit ćemo nju jer je puno brža za ove stvari. Ako ne, onda ćemo raditi na običnom procesoru (CPU).

Zatim, uzimamo ResNet-18.Pošto naš model treba da prepoznaje specifične stvari (naših koliko već klasa imamo), moramo mu promijeniti zadnji dio(ovo ide po layerima). Ovdje je namjesteno da uči samo zadnji dio i pretposljednji sloj (layer4), da ne zaboravi ono što je već naučio, ali da se prilagodi našim slikama.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(weights=ResNet18_Weights.DEFAULT)

num_classes = len(train_ds.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)

# fine-tune only layer4 
for name, param in model.named_parameters():
    param.requires_grad = name.startswith("layer4.") or name.startswith("fc.")

model = model.to(device)

Prvo, opet kažemo modelu da na početku uči samo onaj zadnji sloj koji smo promijenili. 
Onda mu dajemo "uputstva" kako da se poboljšava – to je optimizer. Korisen je Adam.
Imamo i "planera učenja" (scheduler). To znači da će se brzina učenja smanjivati svakih pet epoha (ciklusa učenja) kako bi model finije prilagodio svoje znanje.
I na kraju, trebamo način da izmjerimo koliko model griješi. Za to koristimo CrossEntropyLoss. 

In [7]:
# 5.1 Initially freeze all except fc
for name,param in model.named_parameters():
    param.requires_grad = name.startswith("fc.")

# 5.2 Optimizer: head only
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-3, weight_decay=1e-4
)

# 5.3 LR scheduler: step down every 5 epochs
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

criterion = nn.CrossEntropyLoss()


### Inicijalizacija varijabli za trening

In [None]:
best_val_acc = 0.0
num_epochs   = 20

### Trening i validacija

- Model se postavlja u režim treninga (model.train()).
- Izračunava se gubitak i tačnost na trening skupu.
- Optimizator se resetuje, izvodi se propagacija unazad i ažuriraju se težine modela.
- Planer brzine učenja se ažurira.
- Model se postavlja u režim evaluacije (model.eval()).
- Izračunava se tačnost na validacionom skupu bez izračunavanja gradijenata.
- Ispisuje se tačnost treninga i validacije za trenutnu epohu.
- U epohi 3, parametri layer4 i fc se otključavaju, a optimizator i planer brzine učenja se rekonfigurišu sa nižom brzinom učenja.
- Ako je validaciona tačnost bolja od prethodne najbolje, model se spašava.

In [None]:
for epoch in range(num_epochs):
    model.train()
    train_loss = train_correct = total = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        train_loss    += loss.item() * imgs.size(0)
        preds = logits.argmax(1)
        train_correct += (preds == labels).sum().item()
        total         += labels.size(0)

    scheduler.step()

    # — Validation —
    model.eval()
    val_correct = val_total = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            logits = model(imgs)
            preds  = logits.argmax(1)
            val_correct += (preds == labels).sum().item()
            val_total   += labels.size(0)

    train_acc = train_correct/total
    val_acc   = val_correct/val_total
    print(f"Epoch {epoch+1}: Train Acc {train_acc:.3f}, Val Acc {val_acc:.3f}")

    # On plateau, unfreeze layer4 & lower LR
    if epoch == 3:
        for name,param in model.named_parameters():
            if name.startswith("layer4.") or name.startswith("fc."):
                param.requires_grad = True
        optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=1e-4, weight_decay=1e-4
        )
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    # Save best
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "F:/projekt13/best_resnet18_ft.pth")
        print("✔ New best saved")

Epoch 1: Train Acc 0.185, Val Acc 0.261
✔ New best saved
Epoch 2: Train Acc 0.308, Val Acc 0.348
✔ New best saved
Epoch 3: Train Acc 0.415, Val Acc 0.507
✔ New best saved
Epoch 4: Train Acc 0.518, Val Acc 0.623
✔ New best saved
Epoch 5: Train Acc 0.585, Val Acc 0.797
✔ New best saved
Epoch 6: Train Acc 0.769, Val Acc 0.899
✔ New best saved
Epoch 7: Train Acc 0.805, Val Acc 0.957
✔ New best saved
Epoch 8: Train Acc 0.872, Val Acc 0.971
✔ New best saved
Epoch 9: Train Acc 0.913, Val Acc 0.971
Epoch 10: Train Acc 0.903, Val Acc 0.971
Epoch 11: Train Acc 0.923, Val Acc 0.971
Epoch 12: Train Acc 0.903, Val Acc 0.971
Epoch 13: Train Acc 0.887, Val Acc 0.971
Epoch 14: Train Acc 0.933, Val Acc 0.971
Epoch 15: Train Acc 0.908, Val Acc 0.971
Epoch 16: Train Acc 0.938, Val Acc 0.971
Epoch 17: Train Acc 0.918, Val Acc 0.971
Epoch 18: Train Acc 0.938, Val Acc 0.971
Epoch 19: Train Acc 0.908, Val Acc 0.971
Epoch 20: Train Acc 0.892, Val Acc 0.971


### Test modela na nekoj konkretnoj slici

In [15]:
# Rebuild & load
model = models.resnet18(weights= None)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load("F:/projekt13/best_resnet18_ft.pth", map_location=device))
model.to(device).eval()

# Preprocess test images
from PIL import Image
from torchvision import transforms

preprocess = transforms.Compose([transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    ),])

def predict(path):
    img = Image.open(path).convert("RGB")
    inp = preprocess(img).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(inp)
        prob   = torch.softmax(logits, 1)
        idx    = prob.argmax(1).item()
        return train_ds.classes[idx], prob[0, idx].item()

img_path = r"F:\projekt13\MV5BNTFjZDU5NmYtYzZlMy00YThmLTg5ZjUtYjkyZWI2OTk2Mjc1XkEyXkFqcGc@._V1_FMjpg_UX1000_.jpg"
label, conf = predict(img_path)
print(f"Prediction: {label}  ({conf*100:.1f}% confident)")


Prediction: jerry_seinfeld  (74.7% confident)
