In [1]:
from torchvision.models.resnet import ResNet, BasicBlock,resnet152
from torchvision.datasets import MNIST
from tqdm.notebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import inspect
import time
import torch
from torch import nn, optim
from torchvision.transforms import Compose, ToTensor, Normalize, Resize, Lambda
from torch.utils.data import DataLoader
from Natasha1 import Natasha1
from utils import *

# Linear model for debugging

In [7]:
import torch
from torch.autograd import Variable
import torch.optim as optim

def linear_model(x, W, b):
    return torch.matmul(x, W) + b

data = torch.randn(100, 10)
targets = torch.randn(100)

W = Variable(torch.randn(10, 1), requires_grad=True)
b = Variable(torch.randn(1), requires_grad=True)

epoch = 20

In [8]:
Adam = optim.Adam([W, b])
SGD = optim.SGD([W, b], lr=0.000001)
NT2 = Natasha1([W, b])

for t in range(epoch):
  loss_epoch = 0
  for sample, target in zip(data, targets):
      # clear out the gradients of all Variables 
      # in this optimizer (i.e. W, b)
      Adam.zero_grad()
      output = linear_model(sample, W, b)
      loss = torch.sum((output - target) ** 2)
      loss.backward()
      loss_epoch += loss.item()
      Adam.step()
  print('epoch %d: %0.4f' % (t, loss_epoch))

epoch 0: 1537.0882
epoch 1: 1467.2847
epoch 2: 1400.4708
epoch 3: 1336.2045
epoch 4: 1274.4697
epoch 5: 1215.2055
epoch 6: 1158.3383
epoch 7: 1103.7910
epoch 8: 1051.4871
epoch 9: 1001.3515
epoch 10: 953.3110
epoch 11: 907.2951
epoch 12: 863.2349
epoch 13: 821.0636
epoch 14: 780.7179
epoch 15: 742.1361
epoch 16: 705.2586
epoch 17: 670.0279
epoch 18: 636.3879
epoch 19: 604.2853


In [13]:
Adam = optim.Adam([W, b])
SGD = optim.SGD([W, b], lr=0.000001)
NT1 = Natasha1([W, b], alpha=0.1)

for t in range(epoch):
  loss_epoch = 0
  for sample, target in zip(data, targets):
      # clear out the gradients of all Variables 
      # in this optimizer (i.e. W, b)
      NT1.zero_grad()
      output = linear_model(sample, W, b)
      loss = torch.sum((output - target) ** 2)
      loss.backward()
      loss_epoch += loss.item()
      NT1.step()
  print('epoch %d: %0.4f' % (t, loss_epoch))

epoch 0: 586.7531
epoch 1: 586.7531
epoch 2: 586.7531
epoch 3: 586.7531
epoch 4: 586.7531


KeyboardInterrupt: 

# MNIST

In [1]:
from MnistLeNet import *
from utils import *
import time
from tqdm.notebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from Natasha1 import *

## Adam

In [10]:
# Adam as optimizer

start_ts = time.time()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = MnistLeNet().to(device)

epochs = 1
train_loader, val_loader = get_data_loaders(32, 32)
optimizer = optim.Adam(model.parameters()) #<--------------------------
# NT2 = Natasha2(model.parameters())
criterion = nn.CrossEntropyLoss() 

losses = []
batches = len(train_loader)
val_batches = len(val_loader)

# loop for every epoch (training + evaluation)
for epoch in range(epochs):
    
    total_loss = 0

    progress = tqdm(enumerate(train_loader), desc="Loss: ", total=batches)

    # ----------------- TRAINING  -------------------- 
    # set model to training
    model.train()
    
    for i, data in progress:
        if i % 10 == 1:
            X, y = data[0].to(device), data[1].to(device)
            
            # training step for single batch
            model.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

            # getting training quality data
            total_loss += loss.item()

            # updating progress bar
            progress.set_description("Loss: {:.4f}".format(total_loss/(i+1)))
        
    # releasing unceseccary memory in GPU
    torch.cuda.empty_cache()
    
    # ----------------- VALIDATION  ----------------- 
    val_losses = 0
    precision, recall, f1, accuracy = [], [], [], []
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            X, y = data[0].to(device), data[1].to(device)

            outputs = model(X)
            val_losses += criterion(outputs, y)
            predicted_classes = torch.max(outputs, 1)[1] # get class from network's prediction
            
            # calculate P/R/F1/A metrics for batch
            for acc, metric in zip((precision, recall, f1, accuracy), 
                                   (precision_score, recall_score, f1_score, accuracy_score)):
                acc.append(calculate_metric(metric, y.cpu(), predicted_classes.cpu()))
          
    print(f"Epoch {epoch+1}/{epochs}, training loss: {total_loss/batches}, validation loss: {val_losses/val_batches}")
    print_scores(precision, recall, f1, accuracy, val_batches)
    losses.append(total_loss/batches) # for plotting learning curve
print(f"Training time: {time.time()-start_ts}s")

HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=1875.0, style=ProgressStyle(description_widt…




  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Epoch 1/1, training loss: 0.08562307881116867, validation loss: 0.28686004877090454
	     precision: 0.9110
	        recall: 0.9071
	            F1: 0.8940
	      accuracy: 0.9135
Training time: 19.018944263458252s


## Natasha1.5

In [4]:
# Adam as optimizer

start_ts = time.time()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = MnistLeNet().to(device)
model.zero_grad()
epochs = 5
train_loader, val_loader = get_data_loaders(64, 64)
# optimizer = optim.Adam(model.parameters()) #<--------------------------
optimizer = Natasha1(model.parameters(), alpha=0.0001)
optimizer.zero_grad()
criterion = nn.CrossEntropyLoss() 

losses = []
batches = len(train_loader)
val_batches = len(val_loader)

# loop for every epoch (training + evaluation)
for epoch in range(epochs):
    
    total_loss = 0

    progress = tqdm(enumerate(train_loader), desc="Loss: ", total=batches)

    # ----------------- TRAINING  -------------------- 
    # set model to training
    model.train()
    
    for i, data in progress:
        if i % 10 == 1:
            X, y = data[0].to(device), data[1].to(device)
            
            # training step for single batch
            
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

            # getting training quality data
            total_loss += loss.item()

            # updating progress bar
            progress.set_description("Loss: {:.4f}".format(total_loss/(i+1)))
        
    # releasing unceseccary memory in GPU
    torch.cuda.empty_cache()
    
    # ----------------- VALIDATION  ----------------- 
    val_losses = 0
    precision, recall, f1, accuracy = [], [], [], []
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            X, y = data[0].to(device), data[1].to(device)

            outputs = model(X)
            val_losses += criterion(outputs, y)
            predicted_classes = torch.max(outputs, 1)[1] # get class from network's prediction
            
            # calculate P/R/F1/A metrics for batch
            for acc, metric in zip((precision, recall, f1, accuracy), 
                                   (precision_score, recall_score, f1_score, accuracy_score)):
                acc.append(calculate_metric(metric, y.cpu(), predicted_classes.cpu()))
          
    print(f"Epoch {epoch+1}/{epochs}, training loss: {total_loss/batches}, validation loss: {val_losses/val_batches}")
    print_scores(precision, recall, f1, accuracy, val_batches)
    losses.append(total_loss/batches) # for plotting learning curve
print(f"Training time: {time.time()-start_ts}s")

HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=938.0, style=ProgressStyle(description_width…


Epoch 1/5, training loss: 0.23068008773616636, validation loss: 2.30197811126709
	     precision: 0.0274
	        recall: 0.1390
	            F1: 0.0450
	      accuracy: 0.1376


HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=938.0, style=ProgressStyle(description_width…




KeyboardInterrupt: 