# `UTKFace: labels 0-39 correspond to ages 21-60`

In [1]:
!nvidia-smi

Sun Aug 15 15:32:56 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import os
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# import argparse

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

from torchvision import transforms
from PIL import Image
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from collections import Counter 

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
TRAIN_CSV_PATH = '/content/drive/MyDrive/CSE465_project/Age-Classification/datasets/utk_train.csv'
TEST_CSV_PATH = '/content/drive/MyDrive/CSE465_project/Age-Classification/datasets/utk_test.csv'
IMAGE_PATH = '/content/drive/MyDrive/CSE465_project/Age-Classification/images/UTKFace/'
RANDOM_SEED = 42
MODEL_NAME = "UTK-CE_ResNet_100%"
PATH = "/content/drive/MyDrive/CSE465_project/Models_and_Logs/" + MODEL_NAME
NUM_CLASSES = 40
BATCH_SIZE = 256
GRAYSCALE = False
learning_rate = 0.0005 #5e-4
num_epochs = 20

In [5]:
# GPU or CPU
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')

In [6]:
# Log File
if not os.path.exists(PATH):
  os.mkdir(PATH)
LOGFILE = os.path.join(PATH, 'training.log')

In [7]:
header = []

header.append(f'PyTorch Version: {torch.__version__}')
header.append(f'CUDA device available: {torch.cuda.is_available()}')
header.append(f'Using CUDA device: {DEVICE}')
header.append(f'Random Seed: {RANDOM_SEED}')
header.append(f'Output Path: {PATH}')

In [8]:
with open(LOGFILE, 'w') as f:
    for entry in header:
        print(entry)
        f.write(f'{entry}\n')
        f.flush()

PyTorch Version: 1.9.0+cu102
CUDA device available: True
Using CUDA device: cuda
Random Seed: 42
Output Path: /content/drive/MyDrive/CSE465_project/Models_and_Logs/UTK-CE_ResNet_100%


Dataset

In [9]:
class UTKFaceDataset(Dataset):
    """Custom Dataset for loading UTKFace face images"""
    def __init__(self, csv_path, img_dir, transform=None, items=None):

        df = pd.read_csv(csv_path, index_col=0)
        if items:
          df=df[:items]
        self.img_dir = img_dir
        self.csv_path = csv_path
        self.img_names = df['file'].values
        self.y = df['age'].values
        self.transform = transform

    def __getitem__(self, index):
        img = Image.open(os.path.join(self.img_dir, self.img_names[index]))

        if self.transform is not None:
            img = self.transform(img)

        label = self.y[index]

        return img, label

    def __len__(self):
        return self.y.shape[0]

normalize

`torchvision.transforms.Normalize([meanOfC1, meanOfC2, meanOfC3],[stdOfC1, stdOfC2, stdOfC3])`

In [10]:
custom_transform_train = transforms.Compose([transforms.Resize((128, 128)),
                                       transforms.RandomCrop((120, 120)),
                                       transforms.ToTensor(),
                                      #  transforms.Normalize([0.00058,  0.00047,  0.00042],[0.01957,  0.01619,  0.01502])
                                       ])
custom_transform_valid = custom_transform_train

In [11]:
custom_transform_test = transforms.Compose([transforms.Resize((128, 128)),
                                       transforms.CenterCrop((120, 120)),
                                       transforms.ToTensor()])

In [12]:
train_dataset = UTKFaceDataset(csv_path=TRAIN_CSV_PATH,
                               img_dir=IMAGE_PATH,
                               transform=custom_transform_train,
                               items=None
                              )

In [13]:
len(train_dataset)

13147

# Calculatin mean and std to Normalize Dataset Using Standardization

In [14]:
# loader = DataLoader(train_dataset, batch_size=len(train_dataset), num_workers=1)
# # data = next(iter(tqdm(loader)))
# # data.shape
# for batch in tqdm(loader):
#   print(f'{batch[0][0][0].mean(): .5f}, {batch[0][0][1].mean():.5f}, {batch[0][0][2].mean():.5f}')
#   print(f'{batch[0][0][0].std():.1f}, {batch[0][0][1].std():.1f}, {batch[0][0][2].std():.1f}')
#   # break

In [15]:
# loader = DataLoader(train_dataset, batch_size = 1000, num_workers=2) # 1000,3,120,120
# total_num_of_pixels = len(train_dataset)*3*120*120
# total_num_of_pixels_in_each_channel = len(train_dataset)*120*120

# print(total_num_of_pixels)
# # print(total_num_of_pixels_in_each_channel*3)
# print(total_num_of_pixels_in_each_channel)

In [16]:
# total_sum_channel1 = total_sum_channel2 = total_sum_channel3 = 0.0
# for batch in tqdm(loader):
#   # total_sum += batch[0].sum()
#   # print(batch[0][0][0].shape)
#   # print(batch[0][0][1].shape)
#   # print(batch[0][0][2].shape)

#   # print(batch[0][0][0])
#   # print(batch[0][0][1])
#   # print(batch[0][0][2])
#   # total_sum +=torch.numel(batch[0])
#   # break
#   total_sum_channel1 +=batch[0][0][0].sum()
#   total_sum_channel2 +=batch[0][0][1].sum()
#   total_sum_channel3 +=batch[0][0][2].sum()

# mean_channel1 = total_sum_channel1/total_num_of_pixels_in_each_channel
# mean_channel2 = total_sum_channel2/total_num_of_pixels_in_each_channel
# mean_channel3 = total_sum_channel3/total_num_of_pixels_in_each_channel
# print(total_sum_channel1, total_sum_channel2, total_sum_channel3)
# print(mean_channel1, mean_channel2, mean_channel3)

In [17]:
# sum_of_squared_error_channel1 = sum_of_squared_error_channel2 = sum_of_squared_error_channel3 = 0
# for batch in tqdm(loader):
#   sum_of_squared_error_channel1 += ((batch[0][0][0] - mean_channel1).pow(2)).sum()
#   sum_of_squared_error_channel2 += ((batch[0][0][1] - mean_channel2).pow(2)).sum()
#   sum_of_squared_error_channel3 += ((batch[0][0][2] - mean_channel3).pow(2)).sum()

# std_channel1= torch.sqrt(sum_of_squared_error_channel1/total_num_of_pixels_in_each_channel)
# std_channel2= torch.sqrt(sum_of_squared_error_channel2/total_num_of_pixels_in_each_channel)
# std_channel3= torch.sqrt(sum_of_squared_error_channel3/total_num_of_pixels_in_each_channel)

# print(sum_of_squared_error_channel1, sum_of_squared_error_channel2, sum_of_squared_error_channel3)
# print(std_channel1, std_channel2, std_channel3)

finally mean and std for each channels

In [18]:
# print(f'{mean_channel1.item(): .5f}, {mean_channel2.item(): .5f}, {mean_channel3.item(): .5f}')
# print(f'{std_channel1.item(): .1f}, {std_channel2.item(): .1f}, {std_channel3.item(): .1f}')
# # 0.00058,  0.00047,  0.00042
# # 0.01957,  0.01619,  0.01502
# # After applying the above means and stds we get below means and stds what is our desired
# # 0.02948,  0.02864,  0.02780
# # 1.0,  1.0,  1.0a

# Dataset spliting into train and valid set

In [19]:
valid_size = round(len(train_dataset)*20*0.01) # 20% from train_dataset
train_size = len(train_dataset)-valid_size
train_size, valid_size

(10518, 2629)

In [20]:
train_dataset, valid_dataset = random_split(train_dataset, [train_size, valid_size])

In [21]:
test_dataset = UTKFaceDataset(csv_path=TEST_CSV_PATH,
                              img_dir=IMAGE_PATH,
                              transform=custom_transform_test,
                              items=None
                              )

In [22]:
def dataset_analysis(dataset):
  sorted_dataset = np.sort(dataset)
  counter = Counter(sorted_dataset)
  print("Label\t\tAge\t\tCount")
  for key, value in counter.items():
    print(key,'\t\t',(key+21),'\t\t', value)

In [23]:
type(train_dataset)

torch.utils.data.dataset.Subset

In [24]:
# dataset_analysis(train_dataset.y)

In [25]:
dataset_analysis(test_dataset.y)

Label		Age		Count
0 		 21 		 66
1 		 22 		 80
2 		 23 		 88
3 		 24 		 182
4 		 25 		 141
5 		 26 		 427
6 		 27 		 105
7 		 28 		 186
8 		 29 		 123
9 		 30 		 140
10 		 31 		 63
11 		 32 		 138
12 		 33 		 30
13 		 34 		 75
14 		 35 		 188
15 		 36 		 101
16 		 37 		 60
17 		 38 		 54
18 		 39 		 54
19 		 40 		 102
20 		 41 		 30
21 		 42 		 64
22 		 43 		 35
23 		 44 		 18
24 		 45 		 106
25 		 46 		 28
26 		 47 		 31
27 		 48 		 36
28 		 49 		 28
29 		 50 		 76
30 		 51 		 33
31 		 52 		 46
32 		 53 		 46
33 		 54 		 62
34 		 55 		 57
35 		 56 		 29
36 		 57 		 21
37 		 58 		 55
38 		 59 		 19
39 		 60 		 64


In [26]:
# torch.from_numpy(train_dataset.y).bincount()

In [27]:
torch.from_numpy(test_dataset.y).bincount()

tensor([ 66,  80,  88, 182, 141, 427, 105, 186, 123, 140,  63, 138,  30,  75,
        188, 101,  60,  54,  54, 102,  30,  64,  35,  18, 106,  28,  31,  36,
         28,  76,  33,  46,  46,  62,  57,  29,  21,  55,  19,  64])

In [28]:
len(train_dataset),len(valid_dataset), len(test_dataset)

(10518, 2629, 3287)

In [29]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=2)

In [30]:
valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=2)

In [31]:
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=2)

In [32]:
len(train_loader),len(valid_loader), len(test_loader)

(42, 11, 13)

Model

In [33]:
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

In [34]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

`nn.RELU(inplace=True)` [details](https://discuss.pytorch.org/t/whats-the-difference-between-nn-relu-and-nn-relu-inplace-true/948)

Understanding the underscore( _ ) of Python [link](https://stackoverflow.com/questions/8689964/why-do-some-functions-have-underscores-before-and-after-the-function-name)

`self.modules()` [link](https://discuss.pytorch.org/t/pytorch-self-module/49677)

What do `*` and `**` before a variable name mean in a function signature? [link](https://stackoverflow.com/questions/11315010/what-do-and-before-a-variable-name-mean-in-a-function-signature)

In [35]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes, grayscale):
        self.inplanes = 64
        if grayscale:
            in_dim = 1
        else:
            in_dim = 3
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(in_dim, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1, padding=2)
        self.fc = nn.Linear(2048 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, (2. / n)**.5)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)

        x = x.view(x.size(0), -1) # reshaping, -1 means calculate the suitable number for second dimension 
        logits = self.fc(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [36]:
def resnet34(num_classes, grayscale):
    """Constructs a ResNet-34 model."""
    model = ResNet(block=BasicBlock, 
                   layers=[3, 4, 6, 3],
                   num_classes=num_classes,
                   grayscale=grayscale)
    return model

Initialise Model & Optimizer

In [37]:
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
model = resnet34(NUM_CLASSES, GRAYSCALE)

model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

# Training

PyTorch Normally add the previously calculated gradient with the current calculated gradient and then updates the weight. But here we don't want it. That's why we made the previously calculted gradient zero while calculating a new one with `optimizer.zero_grad()`

In [38]:
# BATCH = len(train_dataset)//BATCH_SIZE # the floor division // rounds the result down to the nearest whole number
min_valid_cost = np.inf
start_time = time.time()

for epoch in tqdm(range(num_epochs)):
  total_num_correct_preds = 0
  num_examples = 0
  model.train()
  for batch_idx, (features, targets) in enumerate(tqdm(train_loader)):

    targets, features =  targets.to(DEVICE), features.to(DEVICE)

    # FORWARD AND BACK PROP
    logits, probas = model(features)
    """print(logits)
        print(logits.shape)
        print("-------------logits end----------------")
        print(probas)
        print(probas.shape)
        print("_____________probs ends________________")
        print(index_highest_probas)
        print(index_highest_probas.shape)
        print("++++++++++++index_highest_probas ends+++++++++")
        print(targets)
        print(targets.shape)
        print(targets.shape[0])
        print("===============actual targets ends==============")"""

    cost = F.cross_entropy(logits, targets) # ? logits are last layer output, where the targets are from 0-39, whether we should pass something else to CE
    optimizer.zero_grad()

    cost.backward()

    # UPDATE MODEL PARAMETERS
    optimizer.step()

    _, index_highest_probas = torch.max(probas, dim = 1)
    # print(index_highest_probas.shape)
    # print(targets.shape)
    num_correct_preds = torch.sum(index_highest_probas == targets)
    # print(num_correct_preds)
    total_num_correct_preds += num_correct_preds
    num_examples += targets.size(0)
    
    """# print(total_num_correct_preds, 'Total')
       # training_acc = float(format(num_correct_preds/targets.shape[0], ".3f"))
       # training_acc = training_acc*100.0
       # print(training_acc)
       # print(targets.shape[0])

       # LOGGING
       # if batch_idx==BATCH:
       #     print(num_correct_preds)
       #     str = f'Epoch: {epoch+1}/{num_epochs} | Batch {batch_idx}/{BATCH} | Cost: {format(cost, ".3f")} Training_acc:{training_acc}%'
       #     print(str)
       #     with open(LOGFILE, 'a') as f: f.write(f'{str}\n')"""
  
  cost_valid = 0.0
  model.eval()
  for i, (f, t) in enumerate(valid_loader):
    t, f =  t.to(DEVICE), f.to(DEVICE)

    logits, probas = model(f)
    cost_valid = F.cross_entropy(logits, t)


  # LOGGING
  print(f'total_num_correct_preds: {total_num_correct_preds}')
  training_acc = format((total_num_correct_preds/num_examples) * 100.0, '.3f')
  time_elapsed = format((time.time() - start_time)/60, ".3f")
  str = f'Epoch: {epoch+1}/{num_epochs} | Train Cost: {format(cost, ".3f")} | Training_acc: {training_acc}% | Valid Cost: {format(cost_valid, ".3f")} Time elapsed: {time_elapsed}min'
  print(str)
  with open(LOGFILE, 'a') as f: f.write(f'{str}\n')

  if min_valid_cost > cost_valid:
    print(f'Validation Cost Decreased({format(min_valid_cost, ".3f")}--->{format(cost_valid, ".3f")}) \t_-_Saving Model_-_')
    min_valid_cost = cost_valid
    # torch.save(model.state_dict(), os.path.join(PATH, 'model.pt'))
    torch.save(model, os.path.join(PATH, 'model.pt'))

with open(LOGFILE, 'a') as f: f.write('############-------Trainig Completed-------############\n')

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


total_num_correct_preds: 1336
Epoch: 1/20 | Train Cost: 3.189 | Training_acc: 12.702% | Valid Cost: 3.705 Time elapsed: 0.371min
Validation Cost Decreased(inf--->3.705) 	_-_Saving Model_-_


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1472
Epoch: 2/20 | Train Cost: 3.247 | Training_acc: 13.995% | Valid Cost: 3.816 Time elapsed: 0.746min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1546
Epoch: 3/20 | Train Cost: 3.060 | Training_acc: 14.699% | Valid Cost: 3.416 Time elapsed: 1.101min
Validation Cost Decreased(3.705--->3.416) 	_-_Saving Model_-_


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1593
Epoch: 4/20 | Train Cost: 3.383 | Training_acc: 15.145% | Valid Cost: 3.577 Time elapsed: 1.473min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1619
Epoch: 5/20 | Train Cost: 2.884 | Training_acc: 15.393% | Valid Cost: 3.370 Time elapsed: 1.842min
Validation Cost Decreased(3.416--->3.370) 	_-_Saving Model_-_


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1711
Epoch: 6/20 | Train Cost: 2.933 | Training_acc: 16.267% | Valid Cost: 3.316 Time elapsed: 2.201min
Validation Cost Decreased(3.370--->3.316) 	_-_Saving Model_-_


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1761
Epoch: 7/20 | Train Cost: 2.955 | Training_acc: 16.743% | Valid Cost: 3.468 Time elapsed: 2.571min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1748
Epoch: 8/20 | Train Cost: 2.702 | Training_acc: 16.619% | Valid Cost: 3.368 Time elapsed: 2.936min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1840
Epoch: 9/20 | Train Cost: 2.849 | Training_acc: 17.494% | Valid Cost: 3.728 Time elapsed: 3.289min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1846
Epoch: 10/20 | Train Cost: 2.649 | Training_acc: 17.551% | Valid Cost: 3.584 Time elapsed: 3.648min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1979
Epoch: 11/20 | Train Cost: 3.091 | Training_acc: 18.815% | Valid Cost: 3.442 Time elapsed: 4.027min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 1918
Epoch: 12/20 | Train Cost: 2.591 | Training_acc: 18.235% | Valid Cost: 3.754 Time elapsed: 4.382min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2102
Epoch: 13/20 | Train Cost: 2.185 | Training_acc: 19.985% | Valid Cost: 3.203 Time elapsed: 4.748min
Validation Cost Decreased(3.316--->3.203) 	_-_Saving Model_-_


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2128
Epoch: 14/20 | Train Cost: 2.937 | Training_acc: 20.232% | Valid Cost: 3.429 Time elapsed: 5.131min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2271
Epoch: 15/20 | Train Cost: 2.578 | Training_acc: 21.592% | Valid Cost: 3.610 Time elapsed: 5.485min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2421
Epoch: 16/20 | Train Cost: 3.103 | Training_acc: 23.018% | Valid Cost: 3.231 Time elapsed: 5.846min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2508
Epoch: 17/20 | Train Cost: 2.686 | Training_acc: 23.845% | Valid Cost: 3.626 Time elapsed: 6.216min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2620
Epoch: 18/20 | Train Cost: 2.235 | Training_acc: 24.910% | Valid Cost: 3.722 Time elapsed: 6.591min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2870
Epoch: 19/20 | Train Cost: 2.733 | Training_acc: 27.287% | Valid Cost: 3.715 Time elapsed: 6.948min


  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 3180
Epoch: 20/20 | Train Cost: 2.753 | Training_acc: 30.234% | Valid Cost: 3.986 Time elapsed: 7.297min


Evaluation

In [39]:
def compute_mae_and_mse(model, data_loader, device):
  mae, mse, num_examples, total_num_correct_preds = 0., 0., 0, 0
  for i, (features, targets) in enumerate(tqdm(data_loader)):
          
    features = features.to(device)
    targets = targets.to(device)

    logits, probas = model(features)
    _, predicted_labels = torch.max(probas, dim=1)
    
    num_correct_preds = torch.sum(predicted_labels == targets)
    total_num_correct_preds += num_correct_preds

    num_examples += targets.size(0)
    mae += torch.sum(torch.abs(predicted_labels - targets))
    mse += torch.sum((predicted_labels - targets)**2)
  


  print(f'total_num_correct_preds: {total_num_correct_preds}')
  acc = format((total_num_correct_preds/num_examples) * 100.0, '.3f')
  mae = format(mae.float()/num_examples, '.3f')
  mse = format(torch.sqrt(mse.float()/num_examples), '.3f')      

  return mae, mse, acc

In [40]:
# model = resnet34(NUM_CLASSES, GRAYSCALE)
model = torch.load(os.path.join(PATH, 'model.pt'))
model.to(DEVICE)
model.eval()
with torch.set_grad_enabled(False):  # save memory during inference

  train_mae, train_mse, train_acc = compute_mae_and_mse(model, train_loader, device=DEVICE)
  test_mae, test_mse, test_acc = compute_mae_and_mse(model, test_loader, device=DEVICE)

  str = f'MAE/RMSE/ACC: | Train: {train_mae}/{train_mse}/{train_acc}% | Test: {test_mae}/{test_mse}/{test_acc}%'
  print(str)
  with open(LOGFILE, 'a') as f: f.write(f'{str}\n')

str = f'Total Elapsed Time: {format(((time.time() - start_time)/60), ".3f")} min'
print(str)
with open(LOGFILE, 'a') as f: f.write(f'{str}\n')

  0%|          | 0/42 [00:00<?, ?it/s]

total_num_correct_preds: 2128


  0%|          | 0/13 [00:00<?, ?it/s]

total_num_correct_preds: 473
MAE/RMSE/ACC: | Train: 4.634/6.961/20.232% | Test: 6.033/8.683/14.390%
Total Elapsed Time: 7.690 min


# Saving the Model

PyTorch: What's the difference between state_dict and parameters()? [link](https://stackoverflow.com/questions/54746829/pytorch-whats-the-difference-between-state-dict-and-parameters)

In [41]:
# model = model.to(torch.device('cpu'))
# torch.save(model.state_dict(), os.path.join(PATH, 'model.pt'))