In [2]:
# ---------------------------------------------------------------------------- #
# An implementation of https://arxiv.org/pdf/1512.03385.pdf                    #
# See section 4.2 for the model architecture on CIFAR-10                       #
# Some part of the code was referenced from below                              #
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py   #
# ---------------------------------------------------------------------------- #

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
num_epochs = 80
learning_rate = 0.001

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                             train=True, 
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                            train=False, 
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100, 
                                          shuffle=False)

# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)

# Residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[0], 2)
        self.layer3 = self.make_layer(block, 64, layers[1], 2)
        self.layer4 = self.make_layer(block, 128, layers[1])
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(128, num_classes)
        
    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        print(nn.Sequential(*layers))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        feature = out.view(out.size(0), -1)
        out = self.fc(feature)
        return out, feature
    
model = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device)



Files already downloaded and verified
Sequential(
  (0): ResidualBlock(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): ResidualBlock(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)
Sequential(
  (0): ResidualBlock(
    (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bia

In [124]:

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# Train the model
total_step = len(train_loader)
curr_lr = learning_rate

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # print(images)
        # print(labels)
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs,feature = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

# Save the model checkpoint
torch.save(model.state_dict(), 'resnet_128.ckpt')
torch.save(model, 'model_128.ckpt')

Epoch [1/80], Step [100/500] Loss: 1.7620
Epoch [1/80], Step [200/500] Loss: 1.4864
Epoch [1/80], Step [300/500] Loss: 1.3161
Epoch [1/80], Step [400/500] Loss: 1.4461
Epoch [1/80], Step [500/500] Loss: 1.1318
Epoch [2/80], Step [100/500] Loss: 0.8508
Epoch [2/80], Step [200/500] Loss: 0.9327
Epoch [2/80], Step [300/500] Loss: 1.0433
Epoch [2/80], Step [400/500] Loss: 0.8035
Epoch [2/80], Step [500/500] Loss: 0.9743
Epoch [3/80], Step [100/500] Loss: 0.7867
Epoch [3/80], Step [200/500] Loss: 1.0103
Epoch [3/80], Step [300/500] Loss: 0.7966
Epoch [3/80], Step [400/500] Loss: 0.8904
Epoch [3/80], Step [500/500] Loss: 0.5328
Epoch [4/80], Step [100/500] Loss: 0.7963
Epoch [4/80], Step [200/500] Loss: 0.8402
Epoch [4/80], Step [300/500] Loss: 0.7290
Epoch [4/80], Step [400/500] Loss: 0.4578
Epoch [4/80], Step [500/500] Loss: 0.6119
Epoch [5/80], Step [100/500] Loss: 0.5628
Epoch [5/80], Step [200/500] Loss: 0.5955
Epoch [5/80], Step [300/500] Loss: 0.7630
Epoch [5/80], Step [400/500] Loss:

In [11]:
model.load_state_dict(torch.load('resnet.ckpt'))

In [125]:
# Test the model  
pred=[]
total_feature=[]
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    i=0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs,feature = model(images)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        # print(feature.cpu().detach().numpy().shape)
        pred=np.concatenate((pred,predicted.cpu().numpy()))
        total_feature=np.append(total_feature,feature.cpu().detach().numpy())
    print(total)
    print(total_feature)
    print(pred)
    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))


# Save the model checkpoint
torch.save(model.state_dict(), 'resnet1.ckpt')
torch.save(model, 'model1.ckpt')

10000
[0.13603687 0.12749322 0.78440607 ... 1.46879411 1.68511033 1.52412641]
[3. 8. 8. ... 5. 1. 7.]
Accuracy of the model on the test images: 89 %


In [83]:
a=np.reshape(total_feature,(10000,-1)).astype('float64')

In [63]:
#test_d = torchvision.datasets.CIFAR10(root='./data/',train=False)
test_l = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=10000)
for images, labels in test_l:
    a=np.reshape(images.numpy()*100,(10000,-1)).astype('int')
    print(a[0][:100])
    print(labels.cpu().numpy().shape)

[61 62 64 65 62 61 63 62 61 62 63 62 63 65 66 66 65 63 62 62 61 58 58 58
 58 56 54 55 56 53 49 45 59 59 62 65 63 62 64 63 63 61 60 62 63 66 67 67
 66 62 60 59 56 54 54 55 58 57 56 55 56 53 49 46 59 59 61 65 62 63 64 64
 63 63 61 61 63 65 65 66 66 62 56 47 43 38 39 44 47 52 56 54 55 54 50 47
 60 60 62 68]
(10000,)


In [118]:
b=a[:1000]
p=pred[:1000]
print(b.shape)
print(p)


(1000, 64)
[ 3.  8.  8. 10.  6.  6.  1.  6.  3.  1. 10.  9.  7.  7.  9.  8.  5.  7.
  8.  6.  3. 10.  4.  9.  5.  2.  3. 10.  9.  6.  6.  5.  4.  5.  9.  2.
  4.  1.  9.  5.  4.  6.  3.  6. 10.  9.  3.  9.  7.  2.  9.  8. 10.  3.
  8.  8.  7.  5.  7.  5.  7.  5.  6.  5.  6.  2.  1.  2.  3.  7.  2.  6.
  8.  8. 10.  2.  9.  3.  5.  8.  8.  9.  1.  7.  0.  5.  2.  3.  8.  9.
 10.  3.  8.  6.  4.  3.  6. 10. 10.  7.  4.  5.  6.  3.  1.  1.  3.  6.
  8.  7.  4. 10.  2.  2.  1.  3. 10.  4.  6.  7.  8.  3.  1.  2.  8. 10.
  8.  3.  3.  2.  4.  1.  8.  9.  1.  2.  9.  7.  2.  8.  6.  5.  6.  3.
  8.  7.  6.  5.  5.  2.  8.  9.  6. 10. 10.  5.  2.  9.  5.  4.  2.  1.
  6.  6.  8.  4.  8.  4.  5. 10.  9.  6.  9.  8.  9.  9.  3.  7.  3. 10.
 10.  5.  2.  2.  3.  8.  6.  3.  2. 10.  5.  8. 10.  1.  7.  2.  8.  8.
  7.  8.  5.  1.  8.  0.  1.  3. 10.  5.  7.  9.  7.  4.  5.  9.  8. 10.
  7.  9.  8.  2.  7.  5.  9.  3.  3.  9.  9.  4.  5.  6.  5.  1.  5.  8.
  8. 10.  4. 10.  5.  5.  1.  1.  8.  9.

In [117]:
l=labels.cpu().numpy()[:1000]
for i in range(1000):
    if l[i]==0:
        p[i]=10
print(p)

[ 3.  8.  8. 10.  6.  6.  1.  6.  3.  1. 10.  9.  7.  7.  9.  8.  5.  7.
  8.  6.  3. 10.  4.  9.  5.  2.  3. 10.  9.  6.  6.  5.  4.  5.  9.  2.
  4.  1.  9.  5.  4.  6.  3.  6. 10.  9.  3.  9.  7.  2.  9.  8. 10.  3.
  8.  8.  7.  5.  7.  5.  7.  5.  6.  5.  6.  2.  1.  2.  3.  7.  2.  6.
  8.  8. 10.  2.  9.  3.  5.  8.  8.  9.  1.  7.  0.  5.  2.  3.  8.  9.
 10.  3.  8.  6.  4.  3.  6. 10. 10.  7.  4.  5.  6.  3.  1.  1.  3.  6.
  8.  7.  4. 10.  2.  2.  1.  3. 10.  4.  6.  7.  8.  3.  1.  2.  8. 10.
  8.  3.  3.  2.  4.  1.  8.  9.  1.  2.  9.  7.  2.  8.  6.  5.  6.  3.
  8.  7.  6.  5.  5.  2.  8.  9.  6. 10. 10.  5.  2.  9.  5.  4.  2.  1.
  6.  6.  8.  4.  8.  4.  5. 10.  9.  6.  9.  8.  9.  9.  3.  7.  3. 10.
 10.  5.  2.  2.  3.  8.  6.  3.  2. 10.  5.  8. 10.  1.  7.  2.  8.  8.
  7.  8.  5.  1.  8.  0.  1.  3. 10.  5.  7.  9.  7.  4.  5.  9.  8. 10.
  7.  9.  8.  2.  7.  5.  9.  3.  3.  9.  9.  4.  5.  6.  5.  1.  5.  8.
  8. 10.  4. 10.  5.  5.  1.  1.  8.  9. 10.  3.  1

In [53]:
from sklearn import manifold, datasets
digits = datasets.load_digits(n_class=6)
pos = digits.data
y = digits.target
print(type(pos[0][1]))

<type 'numpy.float64'>


In [120]:
from sklearn import manifold, datasets
from sklearn.metrics.pairwise import pairwise_distances
from scipy.spatial.distance import squareform
from matplotlib.patches import Ellipse

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from wrapper import Wrapper
# from tsne import TSNE
from vtsne import VTSNE


def preprocess(perplexity=30, metric='euclidean'):
    """ Compute pairiwse probabilities for MNIST pixels.
    """
    pos = b
    y = l
    n_points = pos.shape[0]
    distances2 = pairwise_distances(pos, metric=metric, squared=True)
    # This return a n x (n-1) prob array
    pij = manifold.t_sne._joint_probabilities(distances2, perplexity, False)
    # Convert to n x n prob array
    pij = squareform(pij)
    return n_points, pij, y


draw_ellipse = True
n_points, pij2d, y = preprocess()
i, j = np.indices(pij2d.shape)
i = i.ravel()
j = j.ravel()
pij = pij2d.ravel().astype('float32')
# Remove self-indices
idx = i != j
i, j, pij = i[idx], j[idx], pij[idx]

n_topics = 2
n_dim = 2
print(n_points, n_dim, n_topics)

model = VTSNE(n_points, n_topics, n_dim)
wrap = Wrapper(model, batchsize=4096, epochs=1)
for itr in range(500):
    wrap.fit(pij, i, j)

    # Visualize the results
    embed = model.logits.weight.cpu().data.numpy()
    f = plt.figure()
    if not draw_ellipse:
        plt.scatter(embed[:, 0], embed[:, 1], c=y * 1.0 / y.max())
        plt.axis('off')
        plt.savefig('pic4/scatter_{:03d}.png'.format(itr), bbox_inches='tight')
        plt.close(f)
    else:
        # Visualize with ellipses
        var = np.sqrt(model.logits_lv.weight.clone().exp_().cpu().data.numpy())
        ax = plt.gca()
        for xy, (w, h), c in zip(embed, var, y):
            e = Ellipse(xy=xy, width=w, height=h, ec=None, lw=0.0)
            e.set_facecolor(plt.cm.Paired(c * 1.0 / y.max()))
            e.set_alpha(0.5)
            ax.add_artist(e)
        ax.set_xlim(-9, 9)
        ax.set_ylim(-9, 9)
        plt.axis('off')
        plt.savefig('pic4/scatter_{:03d}.png'.format(itr), bbox_inches='tight')
        plt.close(f)




(1000, 2, 2)
Train Epoch: 0 	Loss: 4.050958e-06
Train Epoch: 0 	Loss: 3.991271e-06
Train Epoch: 0 	Loss: 3.971874e-06
Train Epoch: 0 	Loss: 3.931830e-06
Train Epoch: 0 	Loss: 3.908011e-06
Train Epoch: 0 	Loss: 3.854957e-06
Train Epoch: 0 	Loss: 3.834711e-06
Train Epoch: 0 	Loss: 3.780564e-06
Train Epoch: 0 	Loss: 3.732472e-06
Train Epoch: 0 	Loss: 3.685790e-06
Train Epoch: 0 	Loss: 3.629099e-06
Train Epoch: 0 	Loss: 3.571091e-06
Train Epoch: 0 	Loss: 3.502069e-06
Train Epoch: 0 	Loss: 3.441946e-06
Train Epoch: 0 	Loss: 3.376016e-06
Train Epoch: 0 	Loss: 3.305479e-06
Train Epoch: 0 	Loss: 3.246400e-06
Train Epoch: 0 	Loss: 3.178814e-06
Train Epoch: 0 	Loss: 3.124155e-06
Train Epoch: 0 	Loss: 3.064881e-06
Train Epoch: 0 	Loss: 3.008895e-06
Train Epoch: 0 	Loss: 2.964761e-06
Train Epoch: 0 	Loss: 2.914757e-06
Train Epoch: 0 	Loss: 2.877794e-06
Train Epoch: 0 	Loss: 2.838146e-06
Train Epoch: 0 	Loss: 2.801914e-06
Train Epoch: 0 	Loss: 2.762549e-06
Train Epoch: 0 	Loss: 2.737990e-06
Train E

KeyboardInterrupt: 

In [7]:
model = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device)
model.fc = nn.Linear(128, 100).to(device) 
model.load_state_dict(torch.load('resnet_100.ckpt'))

Sequential(
  (0): ResidualBlock(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): ResidualBlock(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)
Sequential(
  (0): ResidualBlock(
    (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(32, ep

In [6]:
# Finetune on CIFAR10

# If you want to finetune only the top layer of the model, set as below.
for param in model.parameters():
    param.requires_grad = False

# Replace the top layer for finetuning.
# model.fc = nn.Linear(128, 100).to(device)  # 100 is an example.

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset_100 = torchvision.datasets.CIFAR100(root='./data/',
                                             train=True, 
                                             transform=transform,
                                             download=True)

test_dataset_100 = torchvision.datasets.CIFAR100(root='./data/',
                                            train=False, 
                                            transform=transforms.ToTensor())

# Data loader
train_loader_100 = torch.utils.data.DataLoader(dataset=train_dataset_100,
                                           batch_size=100, 
                                           shuffle=True)

test_loader_100 = torch.utils.data.DataLoader(dataset=test_dataset_100,
                                          batch_size=100, 
                                          shuffle=False)


Files already downloaded and verified


In [11]:

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate/10)

# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# Train the model
total_step = len(train_loader_100)
curr_lr = learning_rate

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader_100):
        # print(images)
        # print(labels)
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs, features= model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

# Save the model checkpoint
torch.save(model.state_dict(), 'resnet_100.ckpt')
torch.save(model, 'model_100.ckpt')

Epoch [1/80], Step [100/500] Loss: 0.8784
Epoch [1/80], Step [200/500] Loss: 0.7954
Epoch [1/80], Step [300/500] Loss: 0.7619
Epoch [1/80], Step [400/500] Loss: 0.8145
Epoch [1/80], Step [500/500] Loss: 0.8682
Epoch [2/80], Step [100/500] Loss: 0.7558
Epoch [2/80], Step [200/500] Loss: 0.7218
Epoch [2/80], Step [300/500] Loss: 0.5851
Epoch [2/80], Step [400/500] Loss: 0.7708
Epoch [2/80], Step [500/500] Loss: 0.9946
Epoch [3/80], Step [100/500] Loss: 1.1143
Epoch [3/80], Step [200/500] Loss: 0.7900
Epoch [3/80], Step [300/500] Loss: 0.7051
Epoch [3/80], Step [400/500] Loss: 0.6518
Epoch [3/80], Step [500/500] Loss: 1.0576
Epoch [4/80], Step [100/500] Loss: 0.8085
Epoch [4/80], Step [200/500] Loss: 0.8401
Epoch [4/80], Step [300/500] Loss: 0.7448
Epoch [4/80], Step [400/500] Loss: 1.0680
Epoch [4/80], Step [500/500] Loss: 0.8961
Epoch [5/80], Step [100/500] Loss: 0.6860
Epoch [5/80], Step [200/500] Loss: 0.5728
Epoch [5/80], Step [300/500] Loss: 0.8705
Epoch [5/80], Step [400/500] Loss:

KeyboardInterrupt: 

In [10]:

# Test the model  
pred=[]
total_feature=[]
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    i=0
    for images, labels in test_loader_100:
        images = images.to(device)
        labels = labels.to(device)
        outputs,feature = model(images)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        # print(feature.cpu().detach().numpy().shape)
        pred=np.concatenate((pred,predicted.cpu().numpy()))
        total_feature=np.append(total_feature,feature.cpu().detach().numpy())
    print(total)
    print(total_feature)
    print(pred)
    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

10000
[3.84278941 3.68966603 2.30542397 ... 4.09157228 0.75072026 4.64199352]
[72. 33. 55. ... 51. 88. 70.]
Accuracy of the model on the test images: 61 %
