# SageMaker - Distributed Model Parallelization with Tensorflow - Using Notebook

**Setup**

In [None]:
!pip install sagemaker-experiments

In [None]:
!pip install sagemaker --upgrade

In [None]:
%%time
import sagemaker
from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
import boto3
from time import gmtime, strftime

In [None]:
role = get_execution_role()  

# Provide a pre-existing role ARN as an alternative to creating a new role
print(f"SageMaker Execution Role:{role}")

session = boto3.session.Session()

**Prepare Training Script**

Run this cell to see an example of a training scripts that you can use to configure - SageMaker Distributed Model Parallel with TensorFlow versions 2.3

In [None]:
!cat sm_tf2_distributed-model.py

Run this cell to see an example of a training scripts that you can use to configure - SageMaker Distributed Model Parallel using Horvod with TensorFlow 2.3

In [None]:
!cat sm_tf2_distributed-model_hvd.py

**Define SageMaker Training Job**

You must update the following:

`processes_per_host`

`entry_point`

`instance_count`

`instance_type`

`base_job_name`

In [None]:
sagemaker_session = sagemaker.session.Session(boto_session=session)
mpioptions = "-verbose -x orte_base_help_aggregate=0 "

# Choose an experiment name (only need to create it once)
experiment_name = "SM-MP-DEMO"

In [None]:
all_experiment_names = [exp.experiment_name for exp in Experiment.list()]

# Load the experiment if it exists, otherwise create
if experiment_name not in all_experiment_names:
    customer_churn_experiment = Experiment.create(
        experiment_name=experiment_name, sagemaker_boto_client=boto3.client("sagemaker")
    )
else:
    customer_churn_experiment = Experiment.load(
        experiment_name=experiment_name, sagemaker_boto_client=boto3.client("sagemaker")
    )

In [None]:
# Create a trial for the current run
trial = Trial.create(
    trial_name="SMD-MP-demo-{}".format(strftime("%Y-%m-%d-%H-%M-%S", gmtime())),
    experiment_name=customer_churn_experiment.experiment_name,
    sagemaker_boto_client=boto3.client("sagemaker"),
)

Create Estimator

In [None]:
smd_mp_estimator = TensorFlow(
    
    # Pick your train script
    entry_point="sm_tf2_distributed-model.py",  
    
    # source_dir="utils",
    role=role,
    framework_version="2.3.1",
    py_version="py37",
    instance_type="ml.p3.16xlarge",
    sagemaker_session=sagemaker_session,
    instance_count=1,
    distribution={
        "smdistributed": {
            "modelparallel": {
                "enabled": True,
                "parameters": {
                    "microbatches": 2,
                    "partitions": 2,
                    "pipeline": "interleaved",
                    "optimize": "memory",
                    # Set to True if using the horovod script
                    # "horovod": True, 
                },
            }
        },
        "mpi": {
            "enabled": True,
            # Pick your processes_per_host
            "processes_per_host": 2,  
            "custom_mpi_options": mpioptions,
        },
    },
    base_job_name="SMD-MP-demo",
)

Finally, you will use the estimator to launch the SageMaker training job.

In [None]:
smd_mp_estimator.fit(
    experiment_config={
        "ExperimentName": customer_churn_experiment.experiment_name,
        "TrialName": trial.trial_name,
        "TrialComponentDisplayName": "Training",
    }
)