In [1]:
#set up working dirs
model_folder = './model_checkpoints'
results_folder = './results/'

In [2]:
#imports
import os
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score, auc, precision_recall_fscore_support, confusion_matrix, accuracy_score
from torchvision import transforms 
from torchvision.models import resnet50
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch import nn, optim

In [3]:
#GET DATA
train_datafolder = '../../images/dataset/train'
valid_datafolder = '../../images/dataset/valid'
#test_datafolder = '../../images/dataset/test'

transformations = transforms.Compose([transforms.Resize((224, 224)), 
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
train_dataset = ImageFolder(train_datafolder,transform=transformations) 
valid_dataset = ImageFolder(valid_datafolder,transform=transformations) 

In [17]:
train_loss_weights = torch.Tensor([np.sum(np.array(train_dataset.targets) == 0), np.sum(np.array(train_dataset.targets) == 1)])#/len(dataset.targets)
valid_loss_weights = torch.Tensor([np.sum(np.array(valid_dataset.targets) == 0), np.sum(np.array(valid_dataset.targets) == 1)])#/len(dataset.targets)

train_loss_weights, valid_loss_weights = torch.max(train_loss_weights)/train_loss_weights, torch.max(valid_loss_weights)/valid_loss_weights
train_loss_weights, valid_loss_weights

(tensor([ 1.0000, 30.8428]), tensor([  1.0000, 111.8604]))

In [5]:
#Dataloaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=True)

In [6]:
#device settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Device in use: {device}')

Device in use: cuda:0


In [7]:
#Model

class myResNet50(nn.Module):
  def __init__(self, should_hook=False):
    super(myResNet50, self).__init__()

    self.should_hook = should_hook

    pretrainedmodel = resnet50(pretrained=True)
    
    #freezes weights
    if not self.should_hook:
      print("Freezing params...")
      for param in pretrainedmodel.parameters():
        param.requires_grad = False

    self.base = nn.Sequential(*list(pretrainedmodel.children())[:-2])

    self.avgpool = pretrainedmodel.avgpool

    #final layer
    infeatures = pretrainedmodel.fc.in_features
    self.fc = nn.Linear(infeatures, 2)

    # placeholder for the gradients
    self.gradients = None
    
  # method for the gradient extraction
  def get_activations_gradient(self):
    return self.gradients
  
  # method for the activation exctraction
  def get_activations(self, x):
    return self.base(x)

  # hook for the gradients of the activations
  def activations_hook(self, grad):
    self.gradients = grad


  def forward(self, x):
    #x.requires_grad = True
    x = self.base(x)
    
    # register the hook
    if self.should_hook:
      h = x.register_hook(self.activations_hook)
    
    # apply the remaining pooling
    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    out = self.fc(x)
    return out


model = myResNet50(should_hook=False)
model.to(device)
#print(model)

model_name = 'uk_not_uk_classifier1'

Freezing params...


In [8]:
#model settings
tr_criterion = nn.CrossEntropyLoss(weight=train_loss_weights.to(device))
val_criterion = nn.CrossEntropyLoss(weight=valid_loss_weights.to(device))
# Observe that all parameters are being optimized
optimizer = optim.Adam(model.fc.parameters(), lr=0.002)


In [9]:
#train network

# number of epochs to train the model
n_epochs = 60

valid_loss_min = np.Inf # track change in validation loss

#result files

train_results = results_folder+model_name+'_tr.csv'
val_results = results_folder+model_name+'_val.csv'

with open(train_results, 'a') as train_result:
    header = ",".join(['loss', 'accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','\n'])
    train_result.write(header)

with open(val_results, 'a') as val_result:
    header = ",".join(['loss', 'accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','saved','\n'])
    val_result.write(header)

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0

    # keep track of training and validation outputs
    tr_y_pred, tr_y_target = np.array([]), np.array([])
    val_y_pred, val_y_target = np.array([]), np.array([])

    
    ###################
    # train the model #
    ###################
    model.train()
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model.forward(data)
        # calculate the batch loss
        loss = tr_criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*data.size(0)

        #save data for metrics
        predictions = torch.exp(output)
      
        _, top_classes = predictions.topk(1,dim=1)

        tr_y_target = np.concatenate((tr_y_target,target.cpu()))
        tr_y_pred = np.concatenate((tr_y_pred,top_classes.squeeze(1).cpu()))
        
    ######################    
    # validate the model #
    ######################
    model.eval()
    for data, target in valid_loader:
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model.forward(data)
        # calculate the batch loss
        loss = val_criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)

        #save data for metrics
        predictions = torch.exp(output)
      
        _, top_classes = predictions.topk(1,dim=1)

        val_y_target = np.concatenate((val_y_target,target.cpu()))
        val_y_pred = np.concatenate((val_y_pred,top_classes.squeeze(1).cpu()))
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)

    # calulate metrics
    with open(train_results, 'a') as train_result:
      a, b = confusion_matrix(tr_y_target, tr_y_pred)
      tn, fp, fn, tp = *a, *b
      precision, recall, f1, support = precision_recall_fscore_support(tr_y_target, tr_y_pred, average='weighted')
      precision_0, precision_1, recall_0, recall_1, f1_0, f1_1, count_0, count_1 = *precision, *recall, *f1, *support
      accuracy = accuracy_score(tr_y_target, tr_y_pred)
      auroc = roc_auc_score(tr_y_target, tr_y_pred)
      line = ",".join([str(train_loss), str(accuracy), str(tn), str(fp), str(fn), str(tp), str(precision_0), str(precision_1), str(recall_0), str(recall_1), str(f1_0), str(f1_1), str(count_0), str(count_1), str(auroc), '\n'])
      train_result.write(line)

    with open(val_results, 'a') as val_result:
      a, b = confusion_matrix(val_y_target, val_y_pred)
      tn, fp, fn, tp = *a, *b
      precision, recall, f1, support = precision_recall_fscore_support(val_y_target, val_y_pred, average='weighted')
      precision_0, precision_1, recall_0, recall_1, f1_0, f1_1, count_0, count_1 = *precision, *recall, *f1, *support
      accuracy = accuracy_score(val_y_target, val_y_pred)
      auroc = roc_auc_score(val_y_target, val_y_pred)
      line = ",".join([str(valid_loss), str(accuracy), str(tn), str(fp), str(fn), str(tp), str(precision_0), str(precision_1), str(recall_0), str(recall_1), str(f1_0), str(f1_1), str(count_0), str(count_1), str(auroc), str(valid_loss <= valid_loss_min),'\n'])
      val_result.write(line)
      

        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))
    
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving updated model ...'.format(valid_loss_min, valid_loss))
        torch.save(model.state_dict(), model_folder+'/'+model_name+'.pt')
        valid_loss_min = valid_loss

TypeError: 'numpy.float64' object is not iterable

In [22]:
train_loss * 30.8 + train_loss

0.27198620262500833

In [19]:
loss * 111

tensor(0.0798, device='cuda:0', grad_fn=<MulBackward0>)

In [None]:
#testing model
model.load_state_dict(torch.load(model_folder+'/'+model_name+'.pt'))

model.eval()
test_accuracy = 0.0

y_pred, y_target = np.array([]), np.array([])

test_results = results_folder+model_name+'_test.csv'

with torch.no_grad():
  for test_images, test_labels in test_loader:
      test_images, test_labels = test_images.to(device), test_labels.to(device)
      out = model(test_images)
      predictions = torch.exp(out)
      
      _, top_classes = predictions.topk(1,dim=1)

      y_target = np.concatenate((y_target,test_labels.cpu()))
      y_pred = np.concatenate((y_pred,top_classes.squeeze(1).cpu()))
      

with open(test_results, 'a') as test_result:
  header = ",".join(['accuracy', 'tn', 'fp', 'fn', 'tp', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'count_0', 'count_1','auroc','\n'])
  a, b = confusion_matrix(y_target, y_pred)
  tn, fp, fn, tp = *a, *b
  precision, recall, f1, support = precision_recall_fscore_support(y_target, y_pred, average='weighted')
  precision_0, precision_1, recall_0, recall_1, f1_0, f1_1, count_0, count_1 = *precision, *recall, *f1, *support
  accuracy = accuracy_score(y_target, y_pred)
  auroc = roc_auc_score(y_target, y_pred)
  line = ",".join([str(accuracy), str(tn), str(fp), str(fn), str(tp), str(precision_0), str(precision_1), str(recall_0), str(recall_1), str(f1_0), str(f1_1), str(count_0), str(count_1), str(auroc), '\n'])
  test_result.write(header)
  test_result.write(line)
  print(f"Results in {test_results}")

print(f'Test Accuracy: {accuracy*100:.6f}%')