In [None]:
# AWS libs and setup
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorch

role = get_execution_role()
sess = sagemaker.Session()
region = boto3.session.Session().region_name
sm = boto3.Session().client('sagemaker')

print("Region = {}".format(region))

In [None]:
!sudo apt-get install -y libsndfile1

In [None]:
%%writefile training.py
import argparse
import glob
import io
import logging
import os
import time

import requests
from sagemaker_training import environment
from fastai.vision.all import *
from fastaudio.core.all import *
from fastaudio.augment.all import *
from fastaudio.ci import skip_if_ci
import torchaudio
import pathlib
from IPython.display import Audio
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
# import ipywidgets as widgets

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# source: https://github.com/aws/amazon-sagemaker-examples/blob/master/advanced_functionality/fastai_oxford_pets/source/pets.py

def _train(args):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    logger.info("Device Type: {}".format(device))
    print("Device Type: {}".format(device))

    logger.info("Loading tyhac dataset")
    print(f"Batch size: {args.batch_size}")
    path = Path(args.data_dir)
    print(f"Data path is: {path}")
    
    data_csv = path / 'tyhac_combined_cleaned.csv'
    df = pd.read_csv(data_csv)
    
    # remap the file_path for the training container
    df.file_path.replace('rawdata/combined/', '/opt/ml/input/data/training/', inplace=True, regex=True)

    # Isolate testing data for later
    dataset = df.sample(frac=0.9,random_state=200)
    dataset_test = df.drop(dataset.index) # ~1k
    dataset_test.to_csv(Path(args.model_dir) / 'dataset_test.csv') # save for more testing
    print(f"Dataset train shape is: {dataset.shape}")
    print(f"Dataset test shape is: {dataset_test.shape}")
    
    # configuration for audio processing
    # TODO: move these to args
    n_fft=2048 # 20*n_mels (128 default)
    hop_length=512
    target_rate=48000
    n_mels=128
    
    # Setup fastaudio
    cfg = AudioConfig.BasicMelSpectrogram(n_fft=n_fft, hop_length=hop_length, sample_rate=target_rate)
    a2s = AudioToSpec.from_cfg(cfg)
    print(f"Fastaudio hyperparameters: {cfg}")

    # crop1s = ResizeSignal(6000) # milliseconds
    # pipe = Pipeline([AudioTensor.create, crop1s, a2s])

    # Split out data into train and validation. Remember we've already isolated some for test.
    labels = dataset.covid.values
    test_size=0.2
    splitter = TrainTestSplitter(test_size=test_size, random_state=42, stratify=labels)
    print(f"Dataset label counts: {Counter(labels)}")
    
    db = DataBlock(blocks=(AudioBlock, CategoryBlock),
        get_x=ColReader('file_path'),
        get_y=ColReader('covid'),
        splitter=splitter,
        item_tfms=[ResizeSignal(6000), a2s]) # milliseconds
    
    dsets = db.datasets(dataset)
    
    # Our dataset is imbalanced, attempt to add weights
    count = Counter(labels)
    # AudioTensor, CategoryTensor
    wgts = [1/count[dsets.vocab[label]] for img, label in dsets.train]
    wgts[:10]
    
    # Load data with weights
    dls = db.dataloaders(dataset, num_workers=args.workers, dl_type=WeightedDL, wgts=wgts, bs=args.batch_size)
    
    # Model
    print(f"Model architecture is {args.model_arch}")
    arch = getattr(models, args.model_arch)
    metrics = [accuracy, error_rate, RocAuc()]
    # n_in = number of channels for img
    learn = cnn_learner(dls, arch, n_in=1, metrics=[accuracy, error_rate])
    # model learning
    # learn.fine_tune(args.epochs, freeze_epochs=2) # my orig
    learn.fine_tune(args.epochs) # my orig
    #print("Fit for 4 cycles")
    #learn.fit_one_cycle(args.epochs)
    #learn.unfreeze()
    #print("Unfreeze and fit for another 2 cycles")
    #learn.fit_one_cycle(2, lr_max=slice(1e-6, 1e-4))
    print("Finished Training")
    logger.info("Saving the model.")
    model_path = Path(args.model_dir)
    # print(f"Export data object")
    learn.export(model_path / "model.pth")

    # Model performance
    # https://www.kaggle.com/qitvision/a-complete-ml-pipeline-fast-ai
    preds,y, loss = learn.get_preds(with_loss=True)
    # get accuracy
    acc = accuracy(preds, y)
    print('The accuracy is {0} %.'.format(acc))
    
    from sklearn.metrics import roc_curve, auc
    # probs from log preds
    probs = np.exp(preds[:,1])
    # Compute ROC curve
    fpr, tpr, thresholds = roc_curve(y, probs, pos_label=1)

    # Compute ROC area
    roc_auc = auc(fpr, tpr)
    print('ROC area is {0}'.format(roc_auc))
    
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlim([-0.01, 1.0])
    plt.ylim([0.0, 1.01])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.savefig(model_path / 'roc_curve.jpg', dpi=70, bbox_inches='tight', pad_inches=1)
    plt.close()   

    interp = ClassificationInterpretation.from_learner(learn)
    interp.plot_confusion_matrix(figsize=(12,12), dpi=60)
    plt.savefig(model_path / 'confusion_matrix.jpg', dpi=70, bbox_inches='tight', pad_inches=1)
    plt.close()   
    
    # Report
    classdf = pd.DataFrame(interp.print_classification_report()).transpose()
    classdf
    classdf.to_csv(model_path / 'classification_report.csv')
    
    # Losses
    # losses = learn.recorder.plot_losses(return_fig=True)
    # losses.savefig(model_path / 'loss.png')

    # Did not work
    # lr_find() - gives you suggested
    # learn.lr_find()
    #graph = learn.recorder.plot(return_fig=True)
    #graph.savefig(model_path / 'learning.png')
    
    # create empty models dir
    os.mkdir(model_path / "models")
    print(f"Saving model weights")
    return learn.save(model_path / f"{args.model_arch}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--workers",
        type=int,
        default=2,
        metavar="W",
        help="number of data loading workers (default: 2)",
    )
    parser.add_argument(
        "--epochs",
        type=int,
        default=2,
        metavar="E",
        help="number of total epochs to run (default: 2)",
    )
    parser.add_argument(
        "--batch-size", type=int, default=64, metavar="BS", help="batch size (default: 4)"
    )
    parser.add_argument(
        "--learning-rate",
        type=float,
        default=0.001,
        metavar="LR",
        help="initial learning rate (default: 0.001)",
    )
    parser.add_argument(
        "--momentum", type=float, default=0.9, metavar="M", help="momentum (default: 0.9)"
    )
    parser.add_argument(
        "--model-arch",
        type=str,
        default="resnet34",
        metavar="MA",
        help="model arch (default: resnet34)",
    )

    env = environment.Environment()
    parser.add_argument("--hosts", type=list, default=env.hosts)
    parser.add_argument("--current-host", type=str, default=env.current_host)
    parser.add_argument("--model-dir", type=str, default=env.model_dir)
    parser.add_argument("--data-dir", type=str, default=env.channel_input_dirs.get("training"))
    parser.add_argument("--num-gpus", type=int, default=env.num_gpus)

    _train(parser.parse_args())

In [None]:
rawbucket = sess.default_bucket() # Alternatively you can use our custom bucket here. 
prefix = 'sagemaker-covid-tyhac-fastai' # use this prefix to store all files pertaining to this workshop.
traindataprefix = prefix + '/data'
train_input_path = "s3://{}/{}/".format(rawbucket, traindataprefix)

In [None]:
hyperparams = { 'epochs'       : 1,
                'learning-rate': 0.001, # fastai default
                'batch-size'   : 64, # fastaudio default
                'model-arch'   : 'resnet18', # resnet34
                'workers'      : 16 # default 2
              }

# Versions: https://github.com/aws/deep-learning-containers/blob/master/available_images.md
tyhac_estimator = PyTorch(entry_point         = 'training.py',
                         base_job_name        = 'tyhac-fastai',
                         role                 = role,
                         max_wait             = 21600, # need for spot, >= max_run, 6hrs
                         max_run              = 21600, # seconds
                         output_path          = 's3://{}/{}/models'.format(rawbucket, prefix),
                         sagemaker_session    = sess,
                         use_spot_instances   = True,
                         script_mode          = True,
                         framework_version   = '1.0.0', # tyhac version, needed for deploy
                         #py_version          = 'py36',
                         image_uri            = 'ACCOUNTID.dkr.ecr.ap-southeast-2.amazonaws.com/tyhac-sagemaker-fastai:1.0-gpu-py36',
                         instance_count       = 1,
                         instance_type        = 'ml.g4dn.4xlarge', # ml.g4dn.xlarge # ml.g4dn.4xlarge $1.24 # ~ $0.7364 p/h (per/s billing, 60s min)
                         hyperparameters      = hyperparams) 

tyhac_estimator.fit(train_input_path, wait=True)

In [None]:
model_out = tyhac_estimator.output_path + '/' + tyhac_estimator.latest_training_job.job_name + "/output/model.tar.gz"
!mkdir {tyhac_estimator.latest_training_job.job_name}
!aws s3 cp {model_out} {tyhac_estimator.latest_training_job.job_name}
filename = tyhac_estimator.latest_training_job.job_name + '/' + 'model.tar.gz'
!tar -zxvf {filename} -C {tyhac_estimator.latest_training_job.job_name} 'export.pkl' 'roc_curve.jpg' 'confusion_matrix.jpg' 'dataset_test.csv'

In [None]:
print("Deploying ...")

predictor = tyhac_estimator.deploy(
    endpoint_name="tyhac-endpoint-fastai",
    initial_instance_count=1,
    instance_type='ml.t2.medium', # $0.056 p/h
    wait=False
)

print("Predictor endpoint name : %s" % predictor.endpoint)

In [None]:
# Delete to save costs
predictor.delete_endpoint()

In [None]:
print(tyhac_estimator.model_data)