In [1]:
import os
import shutil

from azureml.core.workspace import Workspace
from azureml.core import Experiment

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.dnn import PyTorch
from azureml.widgets import RunDetails

In [10]:
ws = Workspace.from_config()
project_folder = './'
cluster_name = "gpucluster"
compute_target = ComputeTarget(workspace=ws, name=cluster_name)

#Preprocess test data using Preprocess_TestData.ipynb

#upload test data to blob and delete from local folder
ds = ws.get_default_datastore()
blob_raw = ds.path('raw')
blob_processed = ds.path('precessed')
ds.upload('test-data')
blob_test = ds.path('test-data')

Uploading an estimated of 6 files
Target already exists. Skipping upload for sample_submission_stage_1.csv
Target already exists. Skipping upload for sample_submission_stage_2.csv
Target already exists. Skipping upload for test_stage_1.pkl
Target already exists. Skipping upload for test_stage_1.tsv
Target already exists. Skipping upload for test_stage_2.pkl
Target already exists. Skipping upload for test_stage_2.tsv
Uploaded 0 files


# Train+val combined training

### Use Best hyper parameters as per hyperdrive results

 params: ['--input_dir', 'processed_data', '--isaml', '--fp16', '--val_batch_size', '32', '--bert_type', 'bert-large-uncased', '--epochs', '4', '--lr', '3E-05', '--model_type', 'mul', '--per_gpu_batch_size', '32']


In [9]:

## Using a public image published on Azure.
image_name = 'krishansubudhi/transformers_pytorch:1.3'

from azureml.core.runconfig import MpiConfiguration
script_params = {
    #'--input_dir':blob_processed.as_mount(),
    '--backend':"nccl",
    '--input_dir':blob_processed.as_mount(),
    '--train_file':'train_val_combined.pkl',
    '--is_distributed':"",
    '--isaml':"",
    '--per_gpu_batch_size':8,#32 total
    '--fp16':"",
    '--bert_type':'bert-large-uncased',
    '--lr':3E-5,
    '--epochs':4,
    '--val_batch_size':32,
    '--output_dir': './outputs'
}


mpi=MpiConfiguration()
mpi.process_count_per_node = 4

estimator = PyTorch(source_directory=project_folder, 
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='TrainGAP.py',
                    node_count=1,
                    use_gpu=True,
                    distributed_training = mpi,                
    
                    #Docker image
                    use_docker=True,
                    custom_docker_image=image_name,
                    user_managed=True)
estimator._estimator_config.environment.python.interpreter_path = '/opt/miniconda/envs/amlbert/bin/python'


experiment_name = 'Best_Model_AML'
experiment = Experiment(ws, name=experiment_name)
run2 = experiment.submit(estimator)
RunDetails(run2).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

### Use kaggle test data as validation to get predictions 

In [16]:

## Using a public image published on Azure.
image_name = 'krishansubudhi/transformers_pytorch:1.3'

from azureml.core.runconfig import MpiConfiguration
script_params = {
    #'--input_dir':blob_processed.as_mount(),
    '--backend':"nccl",
    '--input_dir':ds.path('.').as_mount(),
    '--train_file':'precessed/train_val_combined.pkl',
    '--is_distributed':"",
    '--isaml':"",
    '--per_gpu_batch_size':8,#32 total
    '--fp16':"",
    '--bert_type':'bert-large-uncased',
    '--lr':3E-5,
    '--epochs':4,
    '--val_file':'test_stage_2.pkl',
    '--val_batch_size':32,
    '--output_dir': './outputs'
}


mpi=MpiConfiguration()
mpi.process_count_per_node = 4

estimator = PyTorch(source_directory=project_folder, 
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='TrainGAP.py',
                    node_count=1,
                    use_gpu=True,
                    distributed_training = mpi,                
    
                    #Docker image
                    use_docker=True,
                    custom_docker_image=image_name,
                    user_managed=True)
estimator._estimator_config.environment.python.interpreter_path = '/opt/miniconda/envs/amlbert/bin/python'


experiment_name = 'Best_Model_AML'
experiment = Experiment(ws, name=experiment_name)
run2 = experiment.submit(estimator)
RunDetails(run2).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

## Prepare results to upload

Download logits from AML portal by going to the last run. Save it in the gender-pronoun-resolution folder downloaded from kaggle.

Join with the ids in excel and save in the required csv format.