# Project

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import tensorboard
from torchvision import datasets
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score, confusion_matrix, plot_confusion_matrix, f1_score, precision_recall_curve


import os
import matplotlib.pyplot as plt

from textwrap import wrap
import re
import itertools
import tfplot
import matplotlib
import numpy as np
from sklearn.metrics import confusion_matrix

import time
import copy
import numpy as np
import pandas as pd
import seaborn as sns
import datetime

In [4]:
model = torch.hub.load('pytorch/vision:v0.8.2', 'resnet50', pretrained=True)
# or any of these variants
# model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet34', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet101', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet152', pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048,2),
    nn.Softmax()
)
model.eval()


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.8.2


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
def create_confused_matrix(labels, predicted):
    conf_mat = confusion_matrix(labels, predicted)
    hetkarta = sns.heatmap(conf_mat, annot=True)
    hetkarta = hetkarta.get_figure()
    writer.add_figure(model_name + ' heatMappad' + str(datetime.datetime.now()), hetkarta)


In [6]:
transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    #torchvision.transforms.Grayscale(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

In [7]:
batch_size = 1000
batch_size_training = 8

dataset_train = datasets.ImageFolder('Dataset_undersample/train', 
    transform=transforms)
dataset_test = datasets.ImageFolder('Dataset_undersample/test',
    transform = transforms)
dataset_val = datasets.ImageFolder('Dataset_undersample/val',
    transform = transforms)

train_loader = DataLoader(dataset_train, batch_size=batch_size_training, shuffle=True) 
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=True)

In [9]:
print(dataset_train)
print(type(train_loader))
#print(next(iter(test_loader))[0])
tot_labels = []
#for i, data in enumerate(test_loader, 0):
#    image, labels = data[0] , data[1]
#    
#    tot_labels.append(labels)
#print(tot_labels)

Dataset ImageFolder
    Number of datapoints: 2705
    Root location: Dataset_undersample/train
    StandardTransform
Transform: <module 'torchvision.transforms' from '/usr/local/lib/python3.6/dist-packages/torchvision/transforms/__init__.py'>
torch.utils.data.dataloader.DataLoader


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    model.to('cuda')
    
model_name = "model_pretrained_10epochs"
writer = SummaryWriter("runs/" + model_name)

In [9]:
print(device)

cuda:0


In [10]:
# model_name = "model_pretrained_10epochs"
# writer = SummaryWriter("runs/" + model_name)

In [11]:
torch.cuda.empty_cache()
auroc_scores = []
losses = []
losses_val = []
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times
    running_loss = 0.0
    total = 0
    correct = 0
    total_val = 0
    correct_val = 0
    print("Training...")
    for i, data in enumerate(train_loader, 0):
        print( i, end="\r")
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        #inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs).to(device)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # print statistics
        running_loss += loss.item()
        writer.add_scalar('Loss/train', loss.item(), i)
        writer.add_scalar('Accuracy/train', correct/total, i)
       
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            print("Accuracy: ", correct/total)
    print("Training Accuracy: ", correct/total)
            
    print("Validating...")
    for i, data in enumerate(val_loader, 0):
        images, labels = data[0].to(device), data[1].to(device)
        #images, labels = data
        outputs = model(images).to(device)
        loss = criterion(outputs, labels)
        writer.add_scalar('Loss/validation', loss.item(), i)
        writer.add_scalar('Accuracy/validation', correct/total, i)
        _, predicted = torch.max(outputs.data, 1)
        
        try:
            auroc = roc_auc_score(labels.to("cpu"), predicted.to("cpu"))
            auroc_scores.append(auroc)
        except ValueError:
            print("ValueError!!")
            pass
        
        total_val += labels.size(0)
        correct_val += (predicted == labels).sum().item()
    print("Validation Accuracy: ", correct_val/total_val)

print('Finished Training')

Training...
0

  input = module(input)


Training Accuracy:  0.9319778188539741
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.9634011090573013
Validating...
Validation Accuracy:  0.6875
Training...
0

  input = module(input)


Training Accuracy:  0.9763401109057301
Validating...
Validation Accuracy:  0.875
Training...
0

  input = module(input)


Training Accuracy:  0.9811460258780037
Validating...
Validation Accuracy:  0.8125
Training...
0

  input = module(input)


Training Accuracy:  0.9855822550831793
Validating...
Validation Accuracy:  0.8125
Training...
0

  input = module(input)


Training Accuracy:  0.9855822550831793
Validating...
Validation Accuracy:  0.875
Training...
0

  input = module(input)


Training Accuracy:  0.9859519408502773
Validating...
Validation Accuracy:  0.625
Training...
0

  input = module(input)


Training Accuracy:  0.9852125693160814
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.989648798521257
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.9922365988909427
Validating...
Validation Accuracy:  1.0
Training...
0

  input = module(input)


Training Accuracy:  0.9933456561922366
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.9907578558225508
Validating...
Validation Accuracy:  1.0
Training...
0

  input = module(input)


Training Accuracy:  0.9959334565619223
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.9944547134935305
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.9974121996303142
Validating...
Validation Accuracy:  0.9375
Training...
0

  input = module(input)


Training Accuracy:  0.9974121996303142
Validating...
Validation Accuracy:  0.9375
Finished Training


In [12]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# if torch.cuda.is_available():
device = "cpu"
model.to('cpu')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [61]:
correct = 0
total = 0
total_auroc = 0
labels_total = []
auroc_scores = []

# print(type(test_loader))

with torch.no_grad():
    for i ,data in enumerate(test_loader, 0):
        print(i, end="\r")
#         images, labels = data[0].to(device), data[1].to(device)
        images, labels = data[0], data[1]
        #images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        try:
            auroc = roc_auc_score(labels.to("cpu"), predicted.to("cpu"))
            auroc_scores.append(auroc)
        except ValueError:
            pass
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        create_confused_matrix(labels.to("cpu"), predicted.to("cpu"))
        f1 = f1_score(labels, predicted)
        
        proba = outputs.data[:,1]
        lr_probs,lr_recall,_ = precision_recall_curve(labels,proba)
        
        for i in range(len(lr_probs)):
            writer.add_scalar('Precision recall',lr_recall[i],lr_probs[i])

        
print('Accuracy of the network on the', total ,'test images: %d %%' % (
    100 * correct / total))
print("Auoroc: ", np.mean(auroc_scores))
print("F1: ", f1)

0

  input = module(input)


Accuracy of the network on the 468 test images: 82 %
Auoroc:  0.8247863247863249
F1:  0.8503649635036495


In [14]:
print(torch.softmax(torch.max(outputs.data)))

TypeError: softmax() received an invalid combination of arguments - got (Tensor), but expected one of:
 * (Tensor input, name dim, *, torch.dtype dtype)
 * (Tensor input, int dim, torch.dtype dtype)


In [60]:
L1 =torch.tensor([[9.9591e-01, 4.0887e-03],
        [2.0599e-16, 1.0000e+00],
        [1.0000e+00, 2.0411e-06],
        [7.1373e-07, 1.0000e+00],
        [1.0000e+00, 3.3201e-08],
        [3.7300e-15, 1.0000e+00],
        [1.0000e+00, 3.2337e-07],
        [4.1430e-07, 1.0000e+00],
        [1.3036e-14, 1.0000e+00],
        [9.9690e-01, 3.0970e-03],
        [1.1530e-15, 1.0000e+00]])
proba = L1[:,1]

L2 = [0,1,0,1,0,0,0,1,1,1,0]
lr_probs,lr_recall,_ = precision_recall_curve(L2, proba)
print(lr_probs,lr_recall)
writer.add_scalars('Precision recall',lr_probs,lr_recall)
plt.plot(lr_probs,lr_recall,marker='.')

[0.625      0.57142857 0.66666667 1.        ] [1.  0.8 0.8 0. ]


AttributeError: 'numpy.ndarray' object has no attribute 'items'

In [37]:
a = [[0,1],[0,1]]
a[:,1]

TypeError: list indices must be integers or slices, not tuple

# Load and Save Model

In [None]:
model.load_state_dict(torch.load(model_name + ".pt"))
model.to(device)

In [None]:
torch.save(model.state_dict(), model_name +".pt")

# TODO:
## precision recall
## more epochs (300 epochs)