# *Attention*

In order to be able to run this notebook without problems, one must do the following first:
- Add a shortcut of [this](https://drive.google.com/drive/folders/1l-74Qd42waVghTfSsJAPJIMXeg0f77XK?usp=sharing) drive folder to your own drive.
- Mount on your drive.
- Set the *drive_folder* variable to point to the path of the shared folder on your drive.

In [2]:
drive_folder = './drive/MyDrive/Shortcuts/AML_HW1/'
!ls './drive/MyDrive/Shortcuts/AML_HW1/'

AML_HW1.pdf  AML_HW1.zip  Practical


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!cp './drive/MyDrive/Shortcuts/AML_HW1/AML_HW1.zip' './'
!unzip './AML_HW1.zip'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_07.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_08.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_09.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_10.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_11.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_12.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_13.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_14.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_15.png  
  inflating: AML_HW1/Practical/Q5/Omniglot/Malay_(Jawi_-_Arabic)/character28/0583_16.png  
  inflating: AML_HW1/Prac

# **Imports**

In [4]:
import os
import glob
import time
import torch
import torchvision
import numpy as np
import torch.nn as nn
from PIL import Image
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader

In [5]:
data_path = './AML_HW1/Practical/Q5/Omniglot/'
BATCH_SIZE = 128
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cuda', index=0)

# **General Utilities**

In [17]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [18]:
def conv_block(in_channels, out_channels):
    '''
    returns a block conv-bn-relu-pool
    '''
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    
class ProtoNetBack(nn.Module):
    def __init__(self, input_channels = 1):
        super(ProtoNetBack, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(input_channels, 64, 1),
            conv_block(64, 128),
            conv_block(128, 128),
            conv_block(128, 128),
            conv_block(128, 128),
        )

    def get_embedding_size(self, input_size = (1,28,28)):
        device = next(self.parameters()).device
        x = torch.rand([2,*input_size]).to(device)
        with torch.no_grad():
            output = self.forward(x)
            emb_size = output.shape[-1]
        
        del x,output
        torch.cuda.empty_cache()

        return emb_size

    def forward(self, x):
        return self.layers (x).reshape ([x.shape[0] , -1])

In [19]:
class Trainer():

    def __init__(self, name, net, optimizer, criterion, epochs, trainloader,
                 valloader, log_every_iter=100, eval_every_epoch=1, save_every_iter=500,
                 save_path='./', device=None):
        
        self.epochs = epochs
        self.trainloader = trainloader
        self.valloader = valloader
        self.net = net
        self.optimizer = optimizer
        self.criterion = criterion
        self.log_every_iter = log_every_iter
        self.eval_every_epoch = eval_every_epoch
        self.device = device
        self.save_path = save_path
        self.save_every_iter = save_every_iter
        self.name = name
    
    def train(self):
        print('Number of iterations in each epoch: {}'.format(len(self.trainloader)))
        print('Number of validation iterations: {}'.format(len(self.valloader)))
        print('Training started ...')
        
        history = {
            'train_loss': [],
            'train_acc': [],
            'val_acc': []
        }
        epoch_train_loss = 0.0
        epoch_train_acc = 0.0
        epoch_count = 0.0
        
        for epoch in range(self.epochs):
            
            running_loss = 0.0
            running_lp_fraction = 0.0
            running_accuracy = 0.0
            tick = time.time()
            t = time.time()

            for i, data in enumerate(self.trainloader, 0):
                
                overhead_duration = time.time() - t
                t = time.time()

                inputs, labels = data
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                outputs = self.net(inputs)
                loss = self.criterion(outputs, labels)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                preds = torch.argmax(outputs, axis=1)
                acc = ((preds == labels) * 1.0).mean()

                running_loss += loss.item()
                running_accuracy += acc.item()
                epoch_train_loss += loss.item() * inputs.shape[0]
                epoch_train_acc += acc.item() * inputs.shape[0]
                epoch_count += inputs.shape[0]

                processing_duration = time.time() - t
                t = time.time()
                running_lp_fraction += overhead_duration / processing_duration

                if i % self.log_every_iter == self.log_every_iter - 1:
                    tock = time.time()
                    print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / self.log_every_iter:.3f}, \
                            accuracy = {running_accuracy / self.log_every_iter:.4f}')
                    tick = time.time()
                    running_loss = 0.0
                    running_lp_fraction = 0.0
                    running_accuracy = 0.0
                
                # if i % self.save_every_iter == self.save_every_iter - 1:
                #     self.save_model(epoch)
                #     print('Training saved')

            if epoch % self.eval_every_epoch == self.eval_every_epoch - 1:
                epoch_val_acc = self.evaluate(epoch)
                self.net.train()
                history['val_acc'].append(epoch_val_acc)
            
            history['train_loss'].append(epoch_train_loss / epoch_count)
            history['train_acc'].append(epoch_train_acc / epoch_count)
            epoch_train_loss = 0.0
            epoch_train_acc = 0.0
            epoch_count = 0.0

        # self.save_model(self.epochs, False)
        print('Finished Training')
        print('=================================\n')
        return history

    def evaluate(self, epoch):
        print('Evaluating on validation set ... (Epoch {})'.format(epoch + 1))
        sum_acc, sum_loss = 0.0, 0.0
        total_data = 0
        self.net.eval()
        for i, data in enumerate(self.valloader, 0):
            inputs, labels = data
            inputs = inputs.to(self.device)
            labels = labels.to(self.device)
            outputs = self.net(inputs)
            total_data += outputs.shape[0]
            preds = torch.argmax(outputs, axis=1)
            acc = ((preds == labels) * 1.0).mean()
            sum_acc += acc.item() * preds.shape[0]

        epoch_acc = sum_acc / total_data
        print('{}: {}'.format('accuracy', epoch_acc)) 
        print('-------------------------------')
        return epoch_acc

    def save_model(self, epoch, checkpoint=True):
        if checkpoint:
            torch.save({
                'model_state_dict': self.net.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
            }, os.path.join(self.save_path, '%04d.pth' % (epoch + 1)))
        else:
            torch.save({
                'model_state_dict': self.net.state_dict()
            }, os.path.join(self.save_path, '{}.pth'.format(self.name)))

# **Part A**

## Omniglot Single Alphabet Dataset

In [20]:
class SingleAlphabetDataset(Dataset):

    def __init__(self, path, train=True, transform=None):
        self.classes = sorted(glob.glob('{}/*'.format(path)))
        self.num_images = len(glob.glob('{}/*/*'.format(path), recursive=True))
        self.x = []
        self.y = []
        for i, c in enumerate(self.classes):
            char_num = int(c[-2:])
            assert i+1 == char_num
            image_paths = sorted(glob.glob('{}/*'.format(c)))
            if train:
                image_paths = image_paths[:14]
            else:
                image_paths = image_paths[14:]
            for image_path in image_paths:
                img = self.pil_loader(image_path)
                self.x.append(img)
                self.y.append(i)
        self.transform = transform

    def pil_loader(self, path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('L')

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.transform(self.x[idx]), self.y[idx]

transform = transforms.Compose(
    [
     transforms.Resize((32, 32)),
     transforms.ToTensor(),
    #  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

## Model Definition

In [21]:
class OmNet(nn.Module):
    def __init__(self, input_channels=1, num_classes=10):
        super(OmNet, self).__init__()
        self.pn_back = ProtoNetBack(input_channels=input_channels)
        self.fc1 = nn.Linear(512, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pn_back(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        out = self.fc3(x)
        return out

## Start Training

In [22]:
all_alphabets = sorted(glob.glob('{}/*'.format(data_path)))
num_params = []

for i, alphabet in enumerate(all_alphabets):

    alphabet_name = alphabet.split('/')[-1]
    print('Preparing data ... ({}. {})'.format(i+1, alphabet_name))
    trainset = SingleAlphabetDataset(alphabet, train=True, transform=transform)
    valset = SingleAlphabetDataset(alphabet, train=False, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
    valloader = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False)

    net = OmNet(input_channels=1, num_classes=len(trainset.classes)).to(DEVICE)
    num_params.append(count_parameters(net))
    print('Number of parameters: {}'.format(num_params[-1]))
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
    criterion = torch.nn.CrossEntropyLoss()
    
    name = '5a-{}'.format(alphabet_name)
    tr = Trainer(name, net, optimizer, criterion, 20, trainloader, valloader,
                log_every_iter=10, eval_every_epoch=10, device=DEVICE)
    tr.train()

print('Total params: {}'.format(sum(num_params)))

Preparing data ... (1. Alphabet_of_the_Magi)
Number of parameters: 916884
Number of iterations in each epoch: 3
Number of validation iterations: 1
Training started ...
Evaluating on validation set ... (Epoch 10)
accuracy: 0.25
-------------------------------
Evaluating on validation set ... (Epoch 20)
accuracy: 0.9083333611488342
-------------------------------
Finished Training

Preparing data ... (2. Anglo-Saxon_Futhorc)
Number of parameters: 919197
Number of iterations in each epoch: 4
Number of validation iterations: 2
Training started ...
Evaluating on validation set ... (Epoch 10)
accuracy: 0.6436781622897619
-------------------------------
Evaluating on validation set ... (Epoch 20)
accuracy: 0.9195402326255009
-------------------------------
Finished Training

Preparing data ... (3. Arcadian)
Number of parameters: 918426
Number of iterations in each epoch: 3
Number of validation iterations: 2
Training started ...
Evaluating on validation set ... (Epoch 10)
accuracy: 0.153846154

# **Part B**

## Omniglot Multitask Dataset

In [23]:
class BatchSampler(object):

    def __init__(self, root, batch_size, iters, train=True):
        super(BatchSampler, self).__init__()
        self.classes = sorted(glob.glob('{}/*'.format(root)))
        num_images = np.array([len(glob.glob('{}/*/*'.format(c), recursive=True)) for c in self.classes])
        self.class_range = np.concatenate((np.zeros(1), np.cumsum(num_images)))
        self.batch_size = batch_size
        self.train = train
        self.iters = iters

    def __iter__(self):

        for i in range(self.iters):
            rc = np.random.randint(0, len(self.classes))
            lower, upper = self.class_range[rc], self.class_range[rc+1]
            if self.train:
                lower = round(lower * 0.7)
                upper = round(upper * 0.7)
            else:
                lower = round(lower * 0.3)
                upper = round(upper * 0.3)
            total_batch_indexes = np.random.randint(lower, upper, self.batch_size)
            yield total_batch_indexes.astype(int)

    def __len__(self):
        return self.iters

In [24]:
class MultitaskOmniglot(Dataset):

    def __init__(self, path, train=True, transform=None):
        self.alphabet = sorted(glob.glob('{}/*'.format(path)))
        self.alphabet_index = []
        self.x = []
        self.y = []

        for j, a in enumerate(self.alphabet):
            classes = sorted(glob.glob('{}/*'.format(a)))
            for i, c in enumerate(classes):
                char_num = int(c[-2:])
                assert i+1 == char_num
                image_paths = sorted(glob.glob('{}/*'.format(c)))
                if train:
                    image_paths = image_paths[:14]
                else:
                    image_paths = image_paths[14:]
                for image_path in image_paths:
                    img = self.pil_loader(image_path)
                    self.x.append(img)
                    self.y.append(i)
                    self.alphabet_index.append(j)
        
        self.transform = transform

    def pil_loader(self, path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('L')

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.transform(self.x[idx]), self.y[idx], self.alphabet_index[idx]

transform = transforms.Compose(
    [
     transforms.Resize((32, 32)),
     transforms.ToTensor(),
    #  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
     ])

In [25]:
iters = 600
batch_size = 32

train_dataset = MultitaskOmniglot(data_path, train=True, transform=transform)
test_dataset = MultitaskOmniglot(data_path, train=False, transform=transform)
train_sampler = BatchSampler(data_path, batch_size, iters=iters, train=True)
test_sampler = BatchSampler(data_path, batch_size, iters=40, train=False)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=train_sampler)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler)

## Part B.1

### Model definition

In [26]:
class MultiOmNet1(nn.Module):
    def __init__(self, input_channels=1, num_classes=[]):
        super(MultiOmNet1, self).__init__()
        self.pn_back = ProtoNetBack(input_channels=input_channels)
        self.fc1 = nn.Linear(512, 512)
        self.fc2 = nn.Linear(512, 256)
        classifiers = []
        for nc in num_classes:
            classifiers.append(nn.Linear(256, nc))
        self.heads = nn.ModuleList(classifiers)

    def forward(self, x, task_id):
        tid = task_id[0].item()
        check = torch.abs(task_id - tid).sum()
        if check.item() != 0:
            print(tid)
            print(task_id)
            print(check)
        assert check.item() == 0

        x = self.pn_back(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        out = self.heads[tid](x)
        return out

### Training

In [33]:
net = MultiOmNet1(input_channels=1, num_classes=classes).to(DEVICE)
count_parameters(net)

1159492

In [28]:
classes = [20, 29, 26, 41, 40, 24, 46, 14, 26, 34, 33, 22, 26, 43, 24, 48,
           22, 16, 52, 47, 40, 26, 40, 41, 33, 14, 42, 23, 17, 55]

epochs = 20
log_every_iter = 30
eval_every_epoch = 1

net = MultiOmNet1(input_channels=1, num_classes=classes).to(DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
criterion = torch.nn.CrossEntropyLoss()

print('Number of iterations in each epoch: {}'.format(len(train_dataloader)))
print('Number of validation iterations: {}'.format(len(test_dataloader)))
print('Training started ...')

for epoch in range(epochs):    
    running_loss = 0.0
    running_accuracy = 0.0
    
    for i, data in enumerate(train_dataloader, 0):
        inputs, labels, alphabets = data
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        alphabets = alphabets.to(DEVICE)

        outputs = net(inputs, alphabets)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        preds = torch.argmax(outputs, axis=1)
        acc = ((preds == labels) * 1.0).mean()
        running_loss += loss.item()
        running_accuracy += acc.item()

        if i % log_every_iter == log_every_iter - 1:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / log_every_iter:.3f}, \
                    accuracy = {running_accuracy / log_every_iter:.4f}')
            running_loss = 0.0
            running_accuracy = 0.0
    scheduler.step()

    if epoch % eval_every_epoch == eval_every_epoch - 1:
        print('Evaluating on validation set ... (Epoch {})'.format(epoch + 1))
        sum_acc = 0.0
        total_data = 0
        net.eval()
        for i, data in enumerate(test_dataloader, 0):
            inputs, labels, alphabets = data
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            alphabets = alphabets.to(DEVICE)
            outputs = net(inputs, alphabets)
            total_data += outputs.shape[0]
            preds = torch.argmax(outputs, axis=1)
            acc = ((preds == labels) * 1.0).mean()
            sum_acc += acc.item() * preds.shape[0]
        epoch_acc = sum_acc / total_data
        print('{}: {}'.format('accuracy', epoch_acc)) 
        print('-------------------------------')

print('Finished Training')
print('=================================\n')

Number of iterations in each epoch: 600
Number of validation iterations: 40
Training started ...
[1,    30] loss: 3.356,                     accuracy = 0.0396
[1,    60] loss: 3.299,                     accuracy = 0.0469
[1,    90] loss: 3.416,                     accuracy = 0.0552
[1,   120] loss: 3.391,                     accuracy = 0.0542
[1,   150] loss: 3.388,                     accuracy = 0.0521
[1,   180] loss: 3.318,                     accuracy = 0.1219
[1,   210] loss: 3.306,                     accuracy = 0.0885
[1,   240] loss: 3.462,                     accuracy = 0.0875
[1,   270] loss: 3.311,                     accuracy = 0.0854
[1,   300] loss: 3.339,                     accuracy = 0.0833
[1,   330] loss: 3.259,                     accuracy = 0.1052
[1,   360] loss: 3.181,                     accuracy = 0.1323
[1,   390] loss: 3.155,                     accuracy = 0.1000
[1,   420] loss: 3.347,                     accuracy = 0.0969
[1,   450] loss: 3.080,            

### Testing

In [29]:
all_alphabets = sorted(glob.glob('{}/*'.format(data_path)))

for i, alphabet in enumerate(all_alphabets):

    alphabet_name = alphabet.split('/')[-1]
    # print('Preparing data ... ({}. {})'.format(i+1, alphabet_name))
    valset = SingleAlphabetDataset(alphabet, train=False, transform=transform)
    valloader = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False)

    net.eval()
    print('Evaluating on validation set ... ({})'.format(alphabet_name))
    sum_acc = 0.0
    total_data = 0
    for i, data in enumerate(test_dataloader, 0):
        inputs, labels, alphabets = data
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        alphabets = alphabets.to(DEVICE)
        outputs = net(inputs, alphabets)
        total_data += outputs.shape[0]
        preds = torch.argmax(outputs, axis=1)
        acc = ((preds == labels) * 1.0).mean()
        sum_acc += acc.item() * preds.shape[0]
    epoch_acc = sum_acc / total_data
    print('{}: {}'.format('accuracy', epoch_acc)) 
    print('-------------------------------')


Evaluating on validation set ... (Alphabet_of_the_Magi)
accuracy: 0.8234375
-------------------------------
Evaluating on validation set ... (Anglo-Saxon_Futhorc)
accuracy: 0.81015625
-------------------------------
Evaluating on validation set ... (Arcadian)
accuracy: 0.8078125
-------------------------------
Evaluating on validation set ... (Armenian)
accuracy: 0.79765625
-------------------------------
Evaluating on validation set ... (Asomtavruli_(Georgian))
accuracy: 0.7921875
-------------------------------
Evaluating on validation set ... (Balinese)
accuracy: 0.8109375
-------------------------------
Evaluating on validation set ... (Bengali)
accuracy: 0.80625
-------------------------------
Evaluating on validation set ... (Blackfoot_(Canadian_Aboriginal_Syllabics))
accuracy: 0.82734375
-------------------------------
Evaluating on validation set ... (Braille)
accuracy: 0.77578125
-------------------------------
Evaluating on validation set ... (Burmese_(Myanmar))
accuracy: 0.7

## Part B.2

### Model Definition

In [34]:
class MultiOmNet2(nn.Module):
    def __init__(self, input_channels=1, num_classes=[]):
        super(MultiOmNet2, self).__init__()
        self.pn_back = ProtoNetBack(input_channels=input_channels)
        self.fc1 = nn.Linear(512, 512)
        classifiers = []
        for nc in num_classes:
            classifiers.append(nn.Sequential(nn.Linear(512, 256),
                                             nn.ReLU(),
                                             nn.Linear(256, nc)))
        self.heads = nn.ModuleList(classifiers)

    def forward(self, x, task_id):
        tid = task_id[0].item()
        check = torch.abs(task_id - tid).sum()
        if check.item() != 0:
            print(tid)
            print(task_id)
            print(check)
        assert check.item() == 0

        x = self.pn_back(x)
        x = self.fc1(x)
        x = F.relu(x)
        out = self.heads[tid](x)
        return out

### Training

In [38]:
net = MultiOmNet2(input_channels=1, num_classes=classes).to(DEVICE)
count_parameters(net)

4968004

In [39]:
classes = [20, 29, 26, 41, 40, 24, 46, 14, 26, 34, 33, 22, 26, 43, 24, 48,
           22, 16, 52, 47, 40, 26, 40, 41, 33, 14, 42, 23, 17, 55]

epochs = 20
log_every_iter = 30
eval_every_epoch = 1

net = MultiOmNet2(input_channels=1, num_classes=classes).to(DEVICE)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
criterion = torch.nn.CrossEntropyLoss()

print('Number of iterations in each epoch: {}'.format(len(train_dataloader)))
print('Number of validation iterations: {}'.format(len(test_dataloader)))
print('Training started ...')

for epoch in range(epochs):    
    running_loss = 0.0
    running_accuracy = 0.0
    
    for i, data in enumerate(train_dataloader, 0):
        inputs, labels, alphabets = data
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        alphabets = alphabets.to(DEVICE)

        outputs = net(inputs, alphabets)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        preds = torch.argmax(outputs, axis=1)
        acc = ((preds == labels) * 1.0).mean()
        running_loss += loss.item()
        running_accuracy += acc.item()

        if i % log_every_iter == log_every_iter - 1:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / log_every_iter:.3f}, \
                    accuracy = {running_accuracy / log_every_iter:.4f}')
            running_loss = 0.0
            running_accuracy = 0.0
    scheduler.step()

    if epoch % eval_every_epoch == eval_every_epoch - 1:
        print('Evaluating on validation set ... (Epoch {})'.format(epoch + 1))
        sum_acc = 0.0
        total_data = 0
        net.eval()
        for i, data in enumerate(test_dataloader, 0):
            inputs, labels, alphabets = data
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            alphabets = alphabets.to(DEVICE)
            outputs = net(inputs, alphabets)
            total_data += outputs.shape[0]
            preds = torch.argmax(outputs, axis=1)
            acc = ((preds == labels) * 1.0).mean()
            sum_acc += acc.item() * preds.shape[0]
        epoch_acc = sum_acc / total_data
        print('{}: {}'.format('accuracy', epoch_acc)) 
        print('-------------------------------')

print('Finished Training')
print('=================================\n')

Number of iterations in each epoch: 600
Number of validation iterations: 40
Training started ...
[1,    30] loss: 3.347,                     accuracy = 0.0510
[1,    60] loss: 3.436,                     accuracy = 0.0635
[1,    90] loss: 3.266,                     accuracy = 0.0625
[1,   120] loss: 3.324,                     accuracy = 0.0813
[1,   150] loss: 3.417,                     accuracy = 0.0625
[1,   180] loss: 3.339,                     accuracy = 0.0990
[1,   210] loss: 3.345,                     accuracy = 0.1052
[1,   240] loss: 3.250,                     accuracy = 0.1208
[1,   270] loss: 3.221,                     accuracy = 0.1500
[1,   300] loss: 3.152,                     accuracy = 0.1625
[1,   330] loss: 3.183,                     accuracy = 0.1385
[1,   360] loss: 3.040,                     accuracy = 0.1792
[1,   390] loss: 3.128,                     accuracy = 0.1792
[1,   420] loss: 3.022,                     accuracy = 0.1948
[1,   450] loss: 2.977,            

### Testing

In [40]:
all_alphabets = sorted(glob.glob('{}/*'.format(data_path)))

for i, alphabet in enumerate(all_alphabets):

    alphabet_name = alphabet.split('/')[-1]
    # print('Preparing data ... ({}. {})'.format(i+1, alphabet_name))
    valset = SingleAlphabetDataset(alphabet, train=False, transform=transform)
    valloader = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False)

    net.eval()
    print('Evaluating on validation set ... ({})'.format(alphabet_name))
    sum_acc = 0.0
    total_data = 0
    for i, data in enumerate(test_dataloader, 0):
        inputs, labels, alphabets = data
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)
        alphabets = alphabets.to(DEVICE)
        outputs = net(inputs, alphabets)
        total_data += outputs.shape[0]
        preds = torch.argmax(outputs, axis=1)
        acc = ((preds == labels) * 1.0).mean()
        sum_acc += acc.item() * preds.shape[0]
    epoch_acc = sum_acc / total_data
    print('{}: {}'.format('accuracy', epoch_acc)) 
    print('-------------------------------')


Evaluating on validation set ... (Alphabet_of_the_Magi)
accuracy: 0.82734375
-------------------------------
Evaluating on validation set ... (Anglo-Saxon_Futhorc)
accuracy: 0.87734375
-------------------------------
Evaluating on validation set ... (Arcadian)
accuracy: 0.84765625
-------------------------------
Evaluating on validation set ... (Armenian)
accuracy: 0.8390625
-------------------------------
Evaluating on validation set ... (Asomtavruli_(Georgian))
accuracy: 0.86953125
-------------------------------
Evaluating on validation set ... (Balinese)
accuracy: 0.86484375
-------------------------------
Evaluating on validation set ... (Bengali)
accuracy: 0.81953125
-------------------------------
Evaluating on validation set ... (Blackfoot_(Canadian_Aboriginal_Syllabics))
accuracy: 0.8640625
-------------------------------
Evaluating on validation set ... (Braille)
accuracy: 0.84765625
-------------------------------
Evaluating on validation set ... (Burmese_(Myanmar))
accuracy