* changed by nov05 on 2024-11-28  
* Udacity AWS MLE Nanodegree (ND189)  
  Course 4, 3.7 Exercise: Training a Convolutional Neural Network  
* `conda activate drlnd_py310` with cuda enabled   

In [1]:
%cd ..
%pwd

d:\github\udacity-CD0387-deep-learning-topics-within-computer-vision-nlp-project-starter


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


'd:\\github\\udacity-CD0387-deep-learning-topics-within-computer-vision-nlp-project-starter'

## Solution: Training a CNN

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights
from torch.optim.lr_scheduler import CosineAnnealingLR
import time
from tqdm import tqdm

In [8]:
def test(model, test_loader, criterion, device):
    print("Testing model on whole testing dataset...")
    model.eval()
    running_loss=0
    running_corrects=0
    
    for inputs, labels in test_loader:
        inputs=inputs.to(device)
        labels=labels.to(device)
        outputs=model(inputs)
        loss=criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds==labels.data).item()

    total_loss = running_loss / len(test_loader.dataset)
    total_acc = running_corrects/ len(test_loader.dataset)
    print(f"🟢 Testing Accuracy: {100*total_acc}, Testing Loss: {total_loss}")
    

def train(model, train_loader, validation_loader, criterion, optimizer, device,
          epochs):

    best_loss=1e6
    image_dataset={'train':train_loader, 'eval':validation_loader}
    loss_counter=0
    
    for epoch in tqdm(range(epochs)):
        for phase in ['train', 'eval']:
            print(f"Epoch {epoch}, Phase {phase}")
            if phase=='train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            running_samples=0

            for step, (inputs, labels) in enumerate(image_dataset[phase]):
                inputs=inputs.to(device)
                labels=labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase=='train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds==labels.data).item()
                running_samples += len(inputs)
                if (running_samples%2000)==0:
                    accuracy = running_corrects/running_samples
                    print("Step {}, Images [{}/{} ({:.0f}%)] Loss: {:.2f} Accuracy: {}/{} ({:.2f}%) Time: {}".format(
                            step,
                            running_samples,
                            len(image_dataset[phase].dataset),
                            100.0 * (running_samples / len(image_dataset[phase].dataset)),
                            loss.item(),
                            running_corrects,
                            running_samples,
                            100.0*accuracy,
                            time.asctime() # for measuring time for testing, remove for students and in the formatting
                        )
                    )
                ## NOTE: Comment lines below to train and test on whole dataset
                # if running_samples>(0.2*len(image_dataset[phase].dataset)):
                #     break

            epoch_loss = running_loss / running_samples
            # epoch_acc = running_corrects / running_samples
            
            if phase=='eval':
                if epoch_loss<best_loss:
                    best_loss=epoch_loss
                else:
                    loss_counter+=1
        ## early stopping
        if loss_counter==2:
            break
    return model

def create_model():
    # model = resnet18(pretrained=True)
    model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    for param in model.parameters():
        param.requires_grad = False   
    num_features=model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 10)
    )
    return model

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"👉 Running on Device {device}")

training_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
testing_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])


epochs=100
batch_size=128
model=create_model()
model=model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.fc.parameters(), lr=0.001, weight_decay=5e-4)
# optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

trainset = CIFAR10(root='./data', train=True,
    download=True, transform=training_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
    shuffle=True)
testset = CIFAR10(root='./data', train=False,
    download=True, transform=testing_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
    shuffle=False)

train(model, trainloader, testloader, criterion, optimizer, device, epochs)
## Epoch 0, Phase train
## Images [2000/50000 (4%)] Loss: 1.16 Accuracy: 846/2000 (42.30%) Time: Thu Nov 28 16:48:06 2024

👉 Running on Device cuda:0
Files already downloaded and verified
Files already downloaded and verified


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch 0, Phase train
Epoch 0, Phase eval


  1%|          | 1/100 [03:51<6:22:32, 231.84s/it]

Epoch 1, Phase train
Epoch 1, Phase eval


  2%|▏         | 2/100 [08:17<6:51:05, 251.69s/it]

Epoch 2, Phase train
Epoch 2, Phase eval


  3%|▎         | 3/100 [12:54<7:05:53, 263.44s/it]

Epoch 3, Phase train
Epoch 3, Phase eval


  4%|▍         | 4/100 [17:53<7:23:50, 277.40s/it]

Epoch 4, Phase train
Epoch 4, Phase eval


  5%|▌         | 5/100 [22:40<7:24:50, 280.96s/it]

Epoch 5, Phase train
Epoch 5, Phase eval


  6%|▌         | 6/100 [27:05<7:11:27, 275.40s/it]

Epoch 6, Phase train
Epoch 6, Phase eval


  7%|▋         | 7/100 [31:14<6:53:24, 266.71s/it]

Epoch 7, Phase train
Epoch 7, Phase eval


  7%|▋         | 7/100 [35:15<7:48:22, 302.18s/it]






ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
test(model, testloader, criterion, device)
## Testing Accuracy: 80.86, Testing Loss: 0.5680697995185852
## 24.5s

Testing model on whole testing dataset...
🟢 Testing Accuracy: 80.86, Testing Loss: 0.5680697995185852
