In [None]:
import os
from azureml.core import Workspace, Run, Experiment

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

# Also create a Project and attach to Workspace
project_folder = "scripts2"
run_history_name = project_folder

if not os.path.isdir(project_folder):
    os.mkdir(project_folder)

In [None]:
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.datastore import Datastore
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep, MpiStep
from azureml.core.runconfig import CondaDependencies, RunConfiguration

In [None]:
# CPU compute
cpu_cluster_name = "ffmpeg-cluster"
try:
    cpu_cluster = AmlCompute(ws, cpu_cluster_name)
    print("Found existing cluster.")
except:
    print("Creating {}".format(cpu_cluster_name))
    provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2s_v3", min_nodes=1, max_nodes=1)

    # create the cluster
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)
    cpu_cluster.wait_for_completion(show_output=True)
    
# GPU compute
gpu_cluster_name = "style-cluster"
try:
    gpu_cluster = AmlCompute(ws, gpu_cluster_name)
    print("Found existing cluster.")
except:
    print("Creating {}".format(gpu_cluster_name))
    provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_NC6s_v3", min_nodes=4, max_nodes=4)

    # create the cluster
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)
    gpu_cluster.wait_for_completion(show_output=True)

In [None]:
%%writefile $project_folder/process_video.py
import argparse
import glob
import os
import subprocess

parser = argparse.ArgumentParser(description="Process input video")
parser.add_argument('--input-video', required=True)
parser.add_argument('--output-audio', required=True)
parser.add_argument('--output-images', required=True)

args = parser.parse_args()

os.makedirs(args.output_audio, exist_ok=True)
os.makedirs(args.output_images, exist_ok=True)

subprocess.run("ffmpeg -i {} {}/video.aac"
              .format(args.input_video, args.output_audio),
               shell=True, check=True
              )

subprocess.run("ffmpeg -i {} {}/%05d_video.jpg -hide_banner"
              .format(args.input_video, args.output_images),
               shell=True, check=True
              )

In [None]:
%%writefile $project_folder/stitch_video.py
import argparse
import os
import subprocess

parser = argparse.ArgumentParser(description="Process input video")
parser.add_argument('--images-dir', required=True)
parser.add_argument('--input-audio', required=True)
parser.add_argument('--output-dir', required=True)

args = parser.parse_args()

os.makedirs(args.output_dir, exist_ok=True)

subprocess.run("ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p "
               "-y {}/video_without_audio.mp4"
               .format(args.images_dir, args.output_dir),
               shell=True, check=True
              )

subprocess.run("ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec "
               "copy -acodec copy -y {}/video_with_audio.mp4"
               .format(args.output_dir, args.input_audio, args.output_dir),
               shell=True, check=True
              )

In [None]:
# datastore for input video
account_name = "happypathspublic"
video_ds = Datastore.register_azure_blob_container(
    ws, 
    "videos", 
    "videos", 
    account_name=account_name, 
    overwrite=True)

# datastore for models
models_ds = Datastore.register_azure_blob_container(
    ws, 
    "models", 
    "styletransfer", 
    account_name="pipelinedata", 
    overwrite=True)
                                                        
# downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here
models_dir = DataReference(data_reference_name="models", datastore=models_ds, 
                           path_on_datastore="saved_models", mode="download")

# the default blob store attached to a workspace
default_datastore = ws.get_default_datastore()

In [None]:
orangutan_video = DataReference(datastore=video_ds,
                            data_reference_name="video",
                            path_on_datastore="nature.mp4", mode="download")

In [None]:
cd = CondaDependencies()

cd.add_channel("conda-forge")
cd.add_conda_package("ffmpeg")

cd.add_channel("pytorch")
cd.add_conda_package("pytorch")
cd.add_conda_package("torchvision")

cd.add_channel("anaconda")
cd.add_conda_package("mpi4py")

# Runconfig
batchai_run_config = RunConfiguration(conda_dependencies=cd)
batchai_run_config.environment.docker.enabled = True
batchai_run_config.environment.docker.gpu_support = True
batchai_run_config.environment.docker.base_image = "pytorch/pytorch"
batchai_run_config.environment.spark.precache_packages = False

In [None]:
ffmpeg_audio = PipelineData(name="ffmpeg_audio", datastore=default_datastore)
ffmpeg_images = PipelineData(name="ffmpeg_images", datastore=default_datastore)
processed_images = PipelineData(name="processed_images", datastore=default_datastore)
output_video = PipelineData(name="output_video", datastore=default_datastore)

In [None]:
from azureml.pipeline.core.graph import PipelineParameter
#one of "candy", "mosaic", "rain_princess", "udnie" 
style_param = PipelineParameter(name="style", default_value="mosaic")

In [None]:
split_video_step = PythonScriptStep(
    name="split video",
    script_name="process_video.py",
    arguments=["--input-video", orangutan_video,
               "--output-audio", ffmpeg_audio,
               "--output-images", ffmpeg_images,
              ],
    compute_target=cpu_cluster,
    inputs=[orangutan_video],
    outputs=[ffmpeg_images, ffmpeg_audio],
    runconfig=batchai_run_config,
    source_directory=project_folder,
    allow_reuse=False
)

distributed_style_transfer_step = MpiStep(
    name="mpi style transfer",
    script_name="neural_style_mpi.py",
    arguments=["--content-dir", ffmpeg_images,
               "--output-dir", processed_images,
               "--model-dir", models_dir,
               "--style", style_param,
               "--cuda", 1
              ],
    compute_target=gpu_cluster,
    node_count=3, 
    process_count_per_node=1,
    inputs=[models_dir, ffmpeg_images],
    outputs=[processed_images],
    pip_packages=["mpi4py", "torch", "torchvision"],
    runconfig=batchai_run_config,
    use_gpu=True,
    source_directory=project_folder,
    allow_reuse=False
)

stitch_video_step = PythonScriptStep(
    name="stitch",
    script_name="stitch_video.py",
    arguments=["--images-dir", processed_images, 
               "--input-audio", ffmpeg_audio, 
               "--output-dir", output_video],
    compute_target=cpu_cluster,
    inputs=[processed_images, ffmpeg_audio],
    outputs=[output_video],
    runconfig=batchai_run_config,
    source_directory=project_folder,
    allow_reuse=False
)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])
pipeline_run = Experiment(ws, 'style_transfer_mpi_old').submit(pipeline)
# pipeline_run = Experiment(ws, 'style_transfer_mpi_old').submit(pipeline, pipeline_params={"style": "mosaic"})

In [None]:
from azureml.train.widgets import RunDetails
RunDetails(pipeline_run).show()

In [None]:
pipeline_run

In [None]:
def download_video(run, target_dir=None):
    stitch_run = run.find_step_run("stitch")[0]
    port_data = stitch_run.get_output_data("output_video")
    port_data.download(target_dir, show_progress=True)

In [None]:
download_video(pipeline_run, "output_video2")

In [None]:
published_pipeline = pipeline_run.publish_pipeline(
    name="batch score style transfer", description="style transfer", version="1.0")

published_id = published_pipeline.id