In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import os
import torch.nn.functional as F

In [2]:
import torchvision.models as models

In [3]:
transforms = transforms.transforms.Compose([transforms.Resize([224,224]),transforms.ToTensor()])

In [4]:
#data_dir='/Users/paulbruffett/code/ML/invasive/train'
data_dir='D:/ML/invasive/train'

image_datasets = datasets.ImageFolder(data_dir,transform=transforms)
train_set, val_set = torch.utils.data.random_split(image_datasets, [int(len(image_datasets)*0.8), int(len(image_datasets)*0.2)])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=False)

In [5]:
image_datasets.classes

['invasive', 'not-invasive']

In [6]:
"""import pandas as pd
import shutil
labels = pd.read_csv("D:/ML/invasive/train_labels.csv")"""

'import pandas as pd\nimport shutil\nlabels = pd.read_csv("D:/ML/invasive/train_labels.csv")'

In [7]:
"""for i in labels.iterrows():
    file_name = str(i[1]['name'])+'.jpg'
    if i[1]['invasive'] == 1:
        shutil.move(data_dir+"/train/"+file_name,data_dir+"/train/"+"invasive/"+file_name)
    else:
        shutil.move(data_dir+"/train/"+file_name,data_dir+"/train/"+"not-invasive/"+file_name)
    print(i[1]['name'],i[1]['invasive'])"""

'for i in labels.iterrows():\n    file_name = str(i[1][\'name\'])+\'.jpg\'\n    if i[1][\'invasive\'] == 1:\n        shutil.move(data_dir+"/train/"+file_name,data_dir+"/train/"+"invasive/"+file_name)\n    else:\n        shutil.move(data_dir+"/train/"+file_name,data_dir+"/train/"+"not-invasive/"+file_name)\n    print(i[1][\'name\'],i[1][\'invasive\'])'

In [8]:
def create_body(architecture):
    if architecture == 'resnet34':
      model = models.resnet34(pretrained=True)
    elif architecture == 'resnet18':
      model = models.resnet18(pretrained=True)
    "Cut off the body of a typically pretrained `model` at `cut` (int) or cut the model as specified by `cut(model)` (function)."
    return nn.Sequential(*list(model.children())[:-1])


In [9]:
class conv_net(nn.Module):
    def __init__(self,fc_neurons,architecture):
        super().__init__()

        self.body = create_body(architecture)

        conv_out_size = self._get_conv_out([3,224,224])
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, fc_neurons),
            nn.ReLU(),
            nn.Linear(fc_neurons, 2),
            nn.LogSoftmax()
        )

    def _get_conv_out(self, shape):
        o = self.body(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        conv_out = self.body(x).view(x.size()[0], -1)
        return self.fc(conv_out)


    def fit(self, epochs, train_loader, lr):
        criterion = nn.NLLLoss()
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        epochs = epochs
        print_every = 50
        for e in range(epochs):
            running_loss = 0
            for step, (x,y) in enumerate(train_loader):
                y = y.to(device)
                x = x.to(device)
                
                optimizer.zero_grad()
                
                # Forward and backward passes
                output = self.forward(x)
                loss = criterion(output, y)
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
                

            print("Epoch: {}/{}... ".format(e+1, epochs),
                "Loss: {:.4f}".format(running_loss/print_every))
            
    def score(self, valid_loader):
        test_loss = 0
        correct = 0
        preds_c = []
        with torch.no_grad():
          for data, target in valid_loader:
              data = data.to(device)
              target = target.to(device)
              output = self(data)
              test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
              pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
              preds_c.append(pred)
              correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(valid_loader.dataset)

        test_acc = 100. * correct / len(valid_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
          test_loss, correct, len(valid_loader.dataset),test_acc))


In [10]:
#list(model.children())

In [11]:
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('--architecture', type=str)
    parser.add_argument('--lr', type=float)
    parser.add_argument('--n_neurons', type=int)
    parser.add_argument('--epochs',type=int)
    args = parser.parse_args(args=[])

    if args.n_neurons == None: args.n_neurons = 512
    if args.epochs == None: args.epochs = 10
    if args.architecture == None: args.architecture = 'resnet34'
    if args.lr == None: args.lr = 0.01

    model = conv_net(args.n_neurons,args.architecture)

    #todo param for freezing base layers
    n_layers = len(list(model.parameters()))-6
    for layer in model.parameters():
        layer.requires_grad=False
        n_layers -=1
        if n_layers==0:
            break

    criterion = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    
    model = conv_net(args.n_neurons,args.architecture)
    
    model.to(device)
    
    model.fit(args.epochs, train_loader, args.lr)
    
    model.score(valid_loader)

  input = module(input)


Epoch: 1/10...  Loss: 0.1720
Epoch: 2/10...  Loss: 0.0397
Epoch: 3/10...  Loss: 0.0224
Epoch: 4/10...  Loss: 0.0054
Epoch: 5/10...  Loss: 0.0025
Epoch: 6/10...  Loss: 0.0083
Epoch: 7/10...  Loss: 0.0017
Epoch: 8/10...  Loss: 0.0006
Epoch: 9/10...  Loss: 0.0001
Epoch: 10/10...  Loss: 0.0001

Test set: Average loss: 0.0701, Accuracy: 452/459 (98%)

