Imports

In [69]:
from models import ff_eucl, ff_hyp
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import geoopt
from time import time
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
import helper
torch.cuda.is_available()

False

CUDA check

In [70]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using {}'.format(device))

Using cpu


Data Transformation

In [71]:
transform = transforms.Compose([transforms.ToTensor(),
                                #transforms.Normalize((0.1307,), (0.3081,)), 
                              ])

Training and Test data from MNIST data set

In [72]:
train_set = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
test_set = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)

size = len(train_set)
print(size)

train_data, val_data = torch.utils.data.random_split(train_set, [int(size-size*0.2), int(size*0.2)])

trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(val_data, batch_size=64, shuffle= True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=512, shuffle= True)

60000


Initializing the model

In [73]:
model = ff_eucl.EuclFF(784, 512, 256, 10, nn.ReLU())
# ball = geoopt.PoincareBall()
# images, labels = next(iter(trainloader))
# images = images.view(images.shape[0], -1)
# grid = torchvision.utils.make_grid(images)
# tb = SummaryWriter()
# tb.add_image("images", grid)
# tb.add_graph(model, images)
# model.to(device)

Hyperparameters

In [74]:

# learning_rate = 4e-1 #learning rate for ReLU activation function
# #learning_rate = 2e-1 #current learning rate for model without activation functions
# momentum = 0.9
# weight_decay = 5e-4

In [75]:
from itertools import product
parameters = dict(
    lr = [0.1, 0.01],
    batch_size = [64,128],
    shuffle = [True, False]
)

param_values = [v for v in parameters.values()]
print(param_values)

for lr,batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

[[0.1, 0.01], [64, 128], [True, False]]
0.1 64 True
0.1 64 False
0.1 128 True
0.1 128 False
0.01 64 True
0.01 64 False
0.01 128 True
0.01 128 False


Loss Function

In [76]:
#criterion = nn.CrossEntropyLoss()
# #criterion = nn.NLLLoss()
# images, labels = next(iter(trainloader))
# images, labels = images.to(device), labels.to(device)
# images = 0.0357*images.view(images.shape[0], -1)
# print(images)

# out = model(images) #output
# print(out)
# loss = criterion(out, labels) #calculate the loss

Optimizer

In [77]:
#optimizer = geoopt.optim.RiemannianSGD(model.parameters(), lr=learning_rate, momentum=momentum)
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

In [78]:
### Single prediction function
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

### Training function
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    train_loss = 0
    total_correct = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        #images = ball.projx(images.view(images.shape[0], -1))
        # Training pass
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)  
        train_loss += loss.item()
        total_correct += get_num_correct(output, labels)
        #backpropagation
        loss.backward()      
        #Weight optimization
        optimizer.step()  

    return train_loss, total_correct

### Validation function
def val_epoch(model, dataloader, criterion):
    model.eval()
    val_loss = 0
    val_correct = 0
    for  images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        #images = ball.projx(images.view(images.shape[0], -1))
        output = model(images)
        loss = criterion(output, labels)  
        val_loss += loss.item()
        val_correct += get_num_correct(output, labels)
    
    return val_loss, val_correct


### Hyperparameter tuning function
def hparams_tune(epochs):
    for run_id, (lr,batch_size, shuffle) in enumerate(product(*param_values)):
        print("run id:", run_id + 1)
        model = ff_eucl.EuclFF(784, 512, 256, 10, nn.ReLU())
        trainloader = torch.utils.data.DataLoader(train_data,batch_size = batch_size, shuffle = shuffle)
        valloader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle= shuffle)
        optimizer = optim.SGD(model.parameters(), lr=lr)
        criterion = torch.nn.CrossEntropyLoss()
        comment = f' batch_size = {batch_size} lr = {lr} shuffle = {shuffle}'
        tb = SummaryWriter(comment=comment)
        
        for epoch in range(epochs):
            train_loss, total_correct = train_epoch(model, trainloader, optimizer)
            val_loss, val_correct = val_epoch(model, valloader)
                
            tb.add_scalar("Training Loss", train_loss, epoch)
            tb.add_scalar("Validation Loss", val_loss, epoch)
            tb.add_scalar("Training Accuracy", total_correct/len(train_data), epoch)
            tb.add_scalar("Validation Accuracy", val_correct/len(val_data), epoch)

            print("epoch:", epoch, "training loss:",train_loss, "validation loss:", val_loss,
            "training accuracy:", total_correct/len(train_data), "validation accuracy:", val_correct/len(val_data))
        
        tb.add_hparams(
                {"lr": lr, "bsize": batch_size, "shuffle":shuffle},
                {
                    "training accuracy": total_correct/ len(train_data),
                    "validation accuracy": val_correct/ len(val_data),
                    "training loss": train_loss,
                    "validation loss": val_loss,
                },
            )
    tb.close()

    return None

### Model evaluation
def model_eval(model, epochs, trainloader, valloader, optimizer, criterion, tuning=True,):
    if tuning is False:
        for epoch in range(epochs):
            train_loss, total_correct = train_epoch(model, trainloader, optimizer, criterion) 
            val_loss, val_correct = val_epoch(model, valloader, criterion)
            print("epoch:", epoch, "training loss:",train_loss, "validation loss:", val_loss,
            "training accuracy:", total_correct/len(train_data), "validation accuracy:", val_correct/len(val_data))
    
    else:
        hparams_tune(epochs)




In [79]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)
criterion = torch.nn.CrossEntropyLoss()
model_eval(model, 10, trainloader, valloader, optimizer, criterion, tuning=False)

epoch: 0 training loss: 391.2765506133437 validation loss: 47.783599846065044 training accuracy: 0.8581875 validation accuracy: 0.9264166666666667
epoch: 1 training loss: 144.65838014148176 validation loss: 33.09455743059516 training accuracy: 0.9434375 validation accuracy: 0.9495
epoch: 2 training loss: 97.8371591316536 validation loss: 26.386380705982447 training accuracy: 0.9610208333333333 validation accuracy: 0.95825


KeyboardInterrupt: 

In [None]:
# for run_id, (lr,batch_size, shuffle) in enumerate(product(*param_values)):
#     print("run id:", run_id + 1)
#     model = ff_eucl.EuclFF(784, 512, 256, 10, nn.ReLU())
#     trainloader = torch.utils.data.DataLoader(train_data,batch_size = batch_size, shuffle = shuffle)
#     valloader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle= shuffle)
#     optimizer = optim.SGD(model.parameters(), lr=lr)
#     criterion = torch.nn.CrossEntropyLoss()
#     comment = f' batch_size = {batch_size} lr = {lr} shuffle = {shuffle}'
#     tb = SummaryWriter(comment=comment)
# # time0 = time()
#     epochs = 10
#     for epoch in range(epochs):
#     #     model.train()
#     #     train_loss = 0
#     #     total_correct = 0
#     #     for images, labels in trainloader:
#     #         images, labels = images.to(device), labels.to(device)
#     #         # Flatten MNIST images into a 784 long vector
#     #         images = images.view(images.shape[0], -1)
#     #         #images = ball.projx(images.view(images.shape[0], -1))
#     #         # Training pass
#     #         optimizer.zero_grad()
#     #         output = model(images)
#     #         loss = criterion(output, labels)  
#     #         train_loss += loss.item()
#     #         total_correct += get_num_correct(output, labels)
#     #         #backpropagation
#     #         loss.backward()      
#     #         #Weight optimization
#     #         optimizer.step()  
#         train_loss, total_correct = train_epoch(model, trainloader, optimizer)
#         val_loss, val_correct = val_epoch(model, valloader)
#         # val_loss = 0
#         # val_correct = 0
#         # model.eval()
#         # for  images, labels in valloader:
#         #     images, labels = images.to(device), labels.to(device)
#         #     # Flatten MNIST images into a 784 long vector
#         #     images = images.view(images.shape[0], -1)
#         #     #images = ball.projx(images.view(images.shape[0], -1))
#         #     output = model(images)
#         #     loss = criterion(output, labels)  
#         #     val_loss += loss.item()
#         #     val_correct += get_num_correct(output, labels)
            
#         tb.add_scalar("Training Loss", train_loss, epoch)
#         tb.add_scalar("Validation Loss", val_loss, epoch)
#         tb.add_scalar("Training Accuracy", total_correct/len(train_data), epoch)
#         tb.add_scalar("Validation Accuracy", val_correct/len(val_data), epoch)

#         print("epoch:", epoch, "training loss:",train_loss, "validation loss:", val_loss,
#         "training accuracy:", total_correct/len(train_data), "validation accuracy:", val_correct/len(val_data))
    
#     tb.add_hparams(
#             {"lr": lr, "bsize": batch_size, "shuffle":shuffle},
#             {
#                 "training accuracy": total_correct/ len(train_data),
#                 "validation accuracy": val_correct/ len(val_data),
#                 "training loss": train_loss,
#                 "validation loss": val_loss,
#             },
#         )
# tb.close()
# #     else:
# #         print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
# # print("\nTraining Time (in minutes) =",(time()-time0)/60)

run id: 1
epoch: 0 training loss: 392.1312243938446 validation loss: 45.524742260575294 training accuracy: 0.8544166666666667 validation accuracy: 0.9294166666666667
epoch: 1 training loss: 148.6686474904418 validation loss: 30.336301969364285 training accuracy: 0.9421041666666666 validation accuracy: 0.9530833333333333
epoch: 2 training loss: 100.84368281438947 validation loss: 24.182748220860958 training accuracy: 0.9600416666666667 validation accuracy: 0.963
epoch: 3 training loss: 74.7791788065806 validation loss: 19.343951125629246 training accuracy: 0.9703958333333333 validation accuracy: 0.97025
epoch: 4 training loss: 57.605175531469285 validation loss: 17.36317464709282 training accuracy: 0.9774166666666667 validation accuracy: 0.9719166666666667
epoch: 5 training loss: 45.906268164515495 validation loss: 17.34404821647331 training accuracy: 0.9821458333333334 validation accuracy: 0.9715833333333334
epoch: 6 training loss: 37.498522373382 validation loss: 15.201203659642488 tr

In [None]:
# images, labels = next(iter(testloader))
# #images, labels = images.to(device), labels.to(device)

# img = images[0].view(1, 784)
# #img = ball.projx(images[0].view(1, 784))
# #img_gpu = img.to(device)
# with torch.no_grad():
#     out = model(img)

# ps = out.cpu()
# print(ps)
# probab = list(ps.numpy()[0])
# print(probab)
# print("Predicted Digit =", probab.index(max(probab)))
# helper.view_classify(img.view(1, 28, 28), ps)

#Model Prediction and Model Accuracy

In [None]:
correct_count, all_count = 0, 0
for images,labels in testloader:
  images, labels = images.to(device), labels.to(device)
  for i in range(len(labels)):
    img = images[i].view(1, 784)
    #img = ball.projx(images[i].view(1, 784))
    with torch.no_grad():
        out = model(img)

    ps = out.cpu()
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.cpu().numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1
    
print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.9136


Current status of experiments:
1. Using just Hyperboic Linear modules, and with the appropriate self-tuned hyperparameters, and a batch size of 512, the average accuracy was around 90 percent
2. With the use of activation functions(ReLu, ReLu, then LogSoftMax at the output layer), (by applying the functions in the tangent space, then mapping it back to the hyperbolic space), we see an increase in the model accuracy to about 97-98 percent.
3. To account for the correct class probabilities , linear layer was used as the output layer instead, together with the crossentropy loss function.