# 8.1 Machine Learning in the Cloud

Module 8 - AI in the Cloud

For book, references and training materials, please check this project website [http://activefitness.ai/ai-in-sports-with-python](http://activefitness.ai/ai-in-sports-with-python).

Book: [Applied Machine Learning for Health and Fitness](https://www.apress.com/us/book/9781484257715), Chapters 11-12


In [6]:
import azureml.core
azureml.core.VERSION

'1.1.5'

In [None]:
import azureml.core
from azureml.core import Workspace

workspace = Workspace.from_config()

In [None]:
import os
from azureml.core import Workspace, Datastore, Dataset

datastore = workspace.get_default_datastore()
source_dir = os.getcwd()
store_path = 'center_of_mass'

datastore.upload_files(
    files=[os.path.join(source_dir, f) for f in ['skier_center_of_mass.csv']],
    relative_root=source_dir,
    target_path=store_path,
    overwrite=True)
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, store_path))
dataset = dataset.register(workspace=workspace,
                           name='center_of_mass',
                           description='skier center of mass')


## Labeling data in the cloud


In [27]:
# initial experiment configuration
experiment_name = 'activity-classification'
script_folder = 'activity-classification'
cluster_name = "compute-experiments"
model_file_name = 'activities.pkl'
labeled_dataset_name = 'Classifying activities-2020-03-15 00:54:26'
output_folder =  './outputs'
local_download_folder = './download/' 

In [None]:
from azureml.core import Dataset
from azureml.contrib.dataset import FileHandlingOption

dataset = Dataset.get_by_name(workspace, name=labeled_dataset_name)
dataset_pd = dataset.to_pandas_dataframe(
    file_handling_option=FileHandlingOption.DOWNLOAD, 
    target_path=local_download_folder, 
    overwrite_download=True)
dataset_pd

In [None]:
import numpy as np
import matplotlib.pyplot as plt

w=10
h=10
fig=plt.figure(figsize=(15, 15))
plt.subplots_adjust(hspace=0.001)
columns = 2
rows = 2
for i in range(1, columns*rows +1):
    img = mpimg.imread(dataset_pd.loc[i+5,'image_url'])
    ax = fig.add_subplot(rows, columns, i)
    ax.title.set_text(dataset_pd.loc[i+5,'label'])
    ax.axis('off')
    plt.imshow(img)
plt.show()

In [None]:
from torchvision.transforms import functional as F

pytorch_dataset = dataset.to_torchvision()
img = pytorch_dataset[0][0]
print(type(img))

Preparing for training
======================



In [8]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

try:
    compute_target = ComputeTarget(workspace=workspace, name=cluster_name)
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D3_V2', 
                                                           max_nodes=4)

    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

print(compute_target.get_status().serialize())

{'errors': None, 'creationTime': '2020-03-12T14:08:17.927990+00:00', 'createdBy': {'userId': 'e180613e-2ad1-41cc-8aae-8d4183f7b2fd', 'userOrgId': '72f988bf-86f1-41af-91ab-2d7cd011db47'}, 'modifiedTime': '2020-03-12T14:09:04.221524+00:00', 'state': 'Running', 'vmSize': 'STANDARD_D3_V2'}


In [9]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

conda_env = Environment('conda-env')
conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk',
                                                                             'azureml-contrib-dataset',
                                                                             'torch','torchvision',
                                                                             'azureml-dataprep[pandas,fuse]'])

In [None]:
import os
from azureml.train.estimator import Estimator
from azureml.core import Experiment
from azureml.core import Dataset
from azureml.contrib.dataset import FileHandlingOption

experiment = Experiment(workspace=workspace, name=experiment_name)
os.makedirs(script_folder, exist_ok=True)
dataset = Dataset.get_by_name(workspace, name=labeled_dataset_name)

script_params = {
    '--output-folder': output_folder,
    '--model-file': model_file_name
}

estimator = Estimator(source_directory=script_folder, 
                entry_script='train.py',
                script_params=script_params,    
                inputs=[dataset.as_named_input('activities')],
                compute_target=compute_target,
                environment_definition=conda_env)

Model training in the cloud
===========================



In [None]:
from azureml.core import Dataset, Run
import azureml.contrib.dataset
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask

run = Run.get_context()
# get input dataset by name
labeled_dataset = run.input_datasets['activities']

mounted_path = tempfile.mkdtemp()
# mount dataset onto the mounted_path of a Linux-based compute
mount_context = labeled_dataset.mount(mounted_path)
mount_context.start()
print(os.listdir(mounted_path))
print (mounted_path)

In [None]:
import torch
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.utils.data.sampler import SubsetRandomSampler

f = './download/workspaceblobstore/activities'

def load(f, size = .2):
    
    t = transforms.Compose([transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), 
        transforms.Normalize(mean = [0.485, 0.456, 0.406], 
        std = [0.229, 0.224, 0.225])])
        
    train = datasets.ImageFolder(f, transform=t)
    test = datasets.ImageFolder(f, transform=t)
    n = len(train)
    indices = list(range(n))
    split = int(np.floor(size * n))
    np.random.shuffle(indices)
    train_idx, test_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    trainloader = torch.utils.data.DataLoader(train,sampler=train_sampler, batch_size=64)
    testloader = torch.utils.data.DataLoader(test, sampler=test_sampler, batch_size=64)
    return trainloader, testloader

trainloader, testloader = load(f, .2)
print(trainloader.dataset.classes)
images, labels = next(iter(trainloader))
grid = torchvision.utils.make_grid(images)
plt.imshow(grid.permute(1,2,0))

In [None]:
import os
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from azureml.core import Dataset, Run
import azureml.contrib.dataset
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False  
    
run = Run.get_context()

# get input dataset by name
#labeled_dataset = run.input_datasets['activities']
#pytorch_dataset = labeled_dataset.to_torchvision()


features = model.fc.in_features
model.fc = nn.Linear(features, len(labels))
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
print_every = 100

def train_model(epochs=3):
    total_loss = 0
    i = 0
    for epoch in range(epochs):
        for inputs, labels in trainloader:
            i += 1
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            logps = model.forward(inputs)
            loss = criterion(logps, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()

                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            train_losses.append(total_loss/len(trainloader))
            test_losses.append(test_loss/len(testloader))                    
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {total_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader):.3f}")
            running_loss = 0
            model.train()
    return model

train_losses, test_losses = [], []
model = train_model(epochs=3)
torch.save(model, model_file_name)

In [None]:
train_losses, test_losses = [], []
model = train_model(epochs=3)
print('Finished training, saving model')
os.makedirs(output_folder, exist_ok=True)
torch.save(model, os.path.join(output_folder, model_file_name))

In [28]:
%%writefile $experiment_name/train.py

import argparse
import os
import time
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import tempfile
from azureml.core import Dataset, Run
import azureml.contrib.dataset
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask

def load(f, size = .2):
    
    t = transforms.Compose([transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), 
        transforms.Normalize(mean = [0.485, 0.456, 0.406], 
        std = [0.229, 0.224, 0.225])])
        
    train = datasets.ImageFolder(f, transform=t)
    test = datasets.ImageFolder(f, transform=t)
    n = len(train)
    indices = list(range(n))
    split = int(np.floor(size * n))
    np.random.shuffle(indices)
    train_idx, test_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(test_idx)
    trainloader = torch.utils.data.DataLoader(train,sampler=train_sampler, batch_size=64)
    testloader = torch.utils.data.DataLoader(test, sampler=test_sampler, batch_size=64)
    return trainloader, testloader

def get_mounting_path(labeled_dataset):
    
    mounted_path = tempfile.mkdtemp()
    mount_context = labeled_dataset.mount(mounted_path)
    mount_context.start()
    print(os.listdir(mounted_path))
    print (mounted_path)
    print(os.listdir(mounted_path+'/workspaceblobstore'))
    return mounted_path + '/workspaceblobstore/activities'

def start(output_folder, model_file_name):
    
    run = Run.get_context()
    labeled_dataset = run.input_datasets['activities']
    
    data_path =  get_mounting_path(labeled_dataset)

    trainloader, testloader = load(data_path, .2)
    print(trainloader.dataset.classes)
    images, labels = next(iter(trainloader))

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = models.resnet18(pretrained=True)

    for param in model.parameters():
        param.requires_grad = False  

    features = model.fc.in_features
    model.fc = nn.Linear(features, len(labels))
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    # train the model
    print_every = 100
    train_losses, test_losses = [], []
    total_loss = 0
    i = 0
    epochs=3
    for epoch in range(epochs):
        for inputs, labels in trainloader:
            i += 1
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            logps = model.forward(inputs)
            loss = criterion(logps, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()

                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            train_losses.append(total_loss/len(trainloader))
            test_losses.append(test_loss/len(testloader))                    
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {total_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader):.3f}")
            running_loss = 0
            model.train()
    
    print('Finished training')
    os.makedirs(output_folder, exist_ok=True)
    torch.save(model, os.path.join(output_folder, model_file_name))
    print('Model saved:', model_file_name)

if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()
    parser.add_argument("--output-folder", default=None, type=str, dest='output_folder', required=True, help="Output folder for the model")    
    parser.add_argument("--model-file", default=None, type=str, dest='model_file_name', required=True, help="Output model file")
    args = parser.parse_args()
    if args.output_folder:
        os.makedirs(args.output_folder, exist_ok=True)
    output_folder = args.output_folder
    model_file_name = args.model_file_name
    print('Output folder:', output_folder)
    print('Model file:', model_file_name)
    start(output_folder, model_file_name)
    


Overwriting activity-classification/train.py


Running experiments in the cloud
================================



In [None]:
run = experiment.submit(estimator)
run.wait_for_completion(show_output=True)

Model management
================

In [24]:
model = run.register_model(model_name='activities', model_path=output_folder+"/"+model_file_name)
print(model.name, model.id, model.version, sep='\t')

activities	activities:3	3


In [25]:
run.download_file(name=output_folder+"/"+model_file_name, output_file_path='./models')