In [2]:
'''from google.colab import drive
drive.mount('/content/gdrive')
import os
os.getcwd()
os.chdir('/content/gdrive/My Drive/ChestXRay')'''

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import numpy as np
import torch

import os
import random
import logging
import time
from datetime import datetime

from utils.setup_logging import setup_logging
from utils.unzip_data import unzip_data
from utils.dataloaders import get_dataloader

from models.models import get_model
from exp.pretraining import pretrain
from exp.pretesting import pretest
from exp.finding_center import find_center
from exp.training import train
from exp.testing import test

In [10]:
'''
+ load data/
+ perform experiments (train, val and test)
+ write results to logs/

Later versions :
+ download (pull from NIH and filter one file at a time)
'''
def main(model='resnet18', rep_dim=490, dataset='curated', base_path=None, unzip=False, 
         ae_train=True, clf_train=True, ae_epochs=100, clf_epochs=100, 
         batch_size=4, accumulation_steps=32, ae_loadfile=None, clf_loadfile=None,
         save_model=True, ae_test=True, accumulate=False):
    '''
    model : CNN architecture to use ['LeNet', 'VGG', ...]
    data : 'curated' or 'full'
    base_path : path/to/ChestXRay eg. /home/paperspace/ChestXRay
    '''
    if base_path is None:
        raise ValueError('Please point base_path to ChestXRay/')
        
    if ae_train and (ae_loadfile or clf_loadfile):
        raise ValueError('Please either set ae_train to True or specify a loadfile but not both.')
    
    filename = setup_logging(base_path=base_path, model=model, rep_dim=rep_dim)
    logger = logging.getLogger()
    logging.info('Architecture : {}'.format(model))
    logging.info('Representaion Dimensionality : {}'.format(rep_dim))
    logging.info('Dataset : {}'.format(dataset))
        
    if unzip:
        unzip_data(base_path)
   
    trainloader = get_dataloader(dataset=dataset, set_='train', batch_size=batch_size)
    testloader = get_dataloader(dataset=dataset, set_='test', batch_size=batch_size)
    
    #autoencoder = resnet18(num_classes=490, autoencoder=True)
    autoencoder = get_model(model=model, kind='autoencoder', rep_dim=rep_dim)
    if ae_loadfile is not None:
        ae_load_path = os.path.join(base_path, 'models/saved_models/') + ae_loadfile
        autoencoder.load_state_dict(torch.load(ae_load_path), strict=False)
    if ae_train:
        autoencoder = pretrain(trainloader=trainloader, 
                               autoencoder=autoencoder, 
                               ae_epochs=ae_epochs,
                               accumulation_steps=accumulation_steps,
                               accumulate=accumulate)
        
        if save_model:
            save_path = os.path.join(base_path, 'models/saved_models/') + 'ae: ' + filename + '.pt'
            torch.save(autoencoder.state_dict(), save_path)
    
    if ae_test:
        pretest(testloader=testloader, autoencoder=autoencoder)
    del autoencoder
    
    classifier = get_model(model=model, kind='classifier', rep_dim=rep_dim)
    classifier.load_state_dict(torch.load(save_path), strict=False)
    if clf_loadfile is not None:
        clf_load_path = os.path.join(base_path, 'models/saved_models/') + clf_loadfile
        classifier.load_state_dict(torch.load(clf_load_path), strict=False)
    
    c = find_center(trainloader=trainloader, classifier=classifier, rep_dim=rep_dim)
    
    if clf_train:
        classifier = train(trainloader=trainloader,
                           classifier=classifier, 
                           clf_epochs=clf_epochs,
                           accumulation_steps=accumulation_steps,
                           c=c,
                           accumulate=accumulate)
        
        if save_model:
            save_path = os.path.join(base_path, 'models/saved_models/') + 'clf: ' + filename + '.pt'
            torch.save(classifier.state_dict(), save_path)
        
    test(testloader=testloader, classifier=classifier, c=c)
    return

if __name__ == '__main__':
    main()

"if __name__ == '__main__':\n    main()"

In [13]:
'''main(base_path='/content/gdrive/My Drive/ChestXRay', 
     model='resnet18', rep_dim=4900, ae_epochs=10, clf_epochs=10, dataset='clean', ae_train=True, ae_test=True, accumulation_steps=16)'''

INFO:root:Architecture : resnet18
INFO:root:Representaion Dimensionality : 4900
INFO:root:Dataset : clean
INFO:root:Starting pretraining...
INFO:root:Learning rate : 0.001
INFO:root:Gradient accumulation : False
INFO:root:Accumulation steps : 0
INFO:root:AE epochs : 10
INFO:root:  Epoch 1/10	 Time: 214.094	 Loss: 1603.96619639
INFO:root:  Epoch 2/10	 Time: 213.340	 Loss: 111.01252876
INFO:root:  Epoch 3/10	 Time: 213.680	 Loss: 83.81932167
INFO:root:  Epoch 4/10	 Time: 213.285	 Loss: 70.76364526
INFO:root:  Epoch 5/10	 Time: 213.180	 Loss: 62.19698472
INFO:root:  Epoch 6/10	 Time: 213.671	 Loss: 55.35433370
INFO:root:  Epoch 7/10	 Time: 213.362	 Loss: 50.35406888
INFO:root:  Epoch 8/10	 Time: 213.391	 Loss: 46.90351229
INFO:root:  Epoch 9/10	 Time: 212.771	 Loss: 44.27397307
INFO:root:  Epoch 10/10	 Time: 213.377	 Loss: 42.09306953
INFO:root:Pretraining time: 2134.216
INFO:root:Finished pretraining.
INFO:root:Testing autoencoder...
INFO:root:Test set Loss: 40.37110762
INFO:root:Test se