In [1]:
import ntpath, os
import azureml.core
print("SDK version:", azureml.core.VERSION)

from azureml.core import Datastore, Experiment, ScriptRunConfig, Workspace
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.dataset import Dataset
from azureml.core.runconfig import DEFAULT_CPU_IMAGE, RunConfiguration
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.train.estimator import Estimator
from azureml.widgets import RunDetails
from pathlib import Path

SDK version: 1.0.39


In [2]:
DEFAULT_CPU_IMAGE

'mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04'

In [3]:
%run Common.ipynb

C:\Source\CSIRO\HealthyHabitat\.env
None
None
azureml-filestore-21584b3f-784b-4ae9-a4a1-547207c1db9f


In [4]:
ws = Workspace.from_config()
print('Name: {0}'.format(ws.name), 'Resource Group: {0}'.format(ws.resource_group), 'Location: {0}'.format(ws.location), 'Subscription Id: {0}'.format(ws.subscription_id), sep = '\n')

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Name: FishyBusiness
Resource Group: FishyBusiness
Location: australiaeast
Subscription Id: 3191ba83-be2b-4b29-8409-f06e2fbb65bd


In [5]:
compute_name = 'CPU'

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    provisioning_configuration = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2',
                                                                min_nodes = 1,
                                                                max_nodes = 4)

    compute_target = ComputeTarget.create(ws, compute_name, provisioning_configuration)
    
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
    print(compute_target.status.serialize())

Found compute target: CPU


In [6]:
default_file_store = Datastore(ws, "workspacefilestore")

In [7]:
parent_folder = Path(os.getcwd()).parent
print(parent_folder)

data_path = os.path.join(parent_folder, 'data')
print(data_path)
raw_data_path = os.path.join(data_path, 'raw')
print(raw_data_path)
src_path = os.path.join(parent_folder, 'src')
print(src_path)
tools_path = os.path.join(parent_folder, 'tools')
print(tools_path)

source_directory = os.path.join(src_path, 'FishOrNoFish')
print(source_directory)

C:\Source\FishyBusiness
C:\Source\FishyBusiness\data
C:\Source\FishyBusiness\data\raw
C:\Source\FishyBusiness\src
C:\Source\FishyBusiness\tools
C:\Source\FishyBusiness\src\FishOrNoFish


In [None]:
tools_files = []

for root, dirs, files in os.walk(tools_path):
    for file in files:
        tools_files.append(os.path.join(root, file))
        
default_file_store.upload_files(files=tools_files,
                                target_path='tools/ffmpeg-4.1.3-win64-static',
                                overwrite=False,
                                show_progress=True)

In [None]:
for root, dirs, files in os.walk(raw_data_path):
    for file in files:
        if '.MP4' in file and '_VIDEO' not in file and 'Frames' not in root:
            file_path = os.path.join(root, file)
            video_name = ntpath.basename(file_path)
            target_path = os.sep.join(file_path.split(os.sep)[3:-1])
            file_or_dirs = file_service_list_directories_and_files(account_name, storage_key, share_name, target_path)
            if video_name not in file_or_dirs:
                print('Uploading {0}'.format(file_path))
                default_file_store.upload_files([file_path], target_path=target_path, show_progress=True)

### Pipeline

In [None]:
path_on_datastore = os.sep.join(raw_data_path.split(os.sep)[3:]).replace('\\', '/')

raw_data_reference = DataReference(datastore=default_file_store,
                                   data_reference_name='raw_data',
                                   path_on_datastore=path_on_datastore)

In [None]:
path_on_datastore = os.sep.join(tools_path.split(os.sep)[3:]).replace('\\', '/')

tools_reference = DataReference(datastore=default_file_store,
                                   data_reference_name='tools',
                                   path_on_datastore=path_on_datastore)

In [None]:
conda_dependencies = CondaDependencies()
conda_dependencies.add_conda_package('opencv')

run_configuration = RunConfiguration()
run_configuration.environment.docker.enabled = True
run_configuration.environment.docker.base_image = DEFAULT_CPU_IMAGE
run_configuration.environment.python.user_managed_dependencies = False
run_configuration.environment.python.conda_dependencies = conda_dependencies
run_configuration.target = compute_target

In [None]:
extract_video_step = PythonScriptStep(name='extract_video',
                                      source_directory=source_directory,
                                      script_name='extract_video.py',
                                      arguments=['--raw_data_path', raw_data_reference, '--tools_path', tools_reference],
                                      inputs=[raw_data_reference, tools_reference],
                                      runconfig=run_configuration,
                                      allow_reuse=False)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[extract_video_step])

In [None]:
pipeline_run = Experiment(ws, 'extract_video').submit(pipeline)
pipeline_run

In [None]:
RunDetails(pipeline_run).show()

In [None]:
pipeline_run.wait_for_completion(show_output=True)

In [None]:
extract_frames_step = PythonScriptStep(name='extract_frames',
                                       source_directory=source_directory,
                                       script_name='extract_frames.py',
                                       arguments=['--raw_data', videos, '--raw_frames', frames, '--x', 2],
                                       inputs=[videos, frames],
                                       #outputs=[frames],
                                       runconfig=run_configuration,
                                       allow_reuse=False)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[extract_frames_step])

In [None]:
pipeline_run = Experiment(ws, 'extract_frames').submit(pipeline)
pipeline_run

In [None]:
RunDetails(pipeline_run).show()

In [None]:
pipeline_run.wait_for_completion(show_output=True)

In [None]:
children = pipeline_run.get_children()
for child in children:
    status = child.get_status()
    print('Id:', child.id, 'Script:', child.name, 'Status:', status)
    RunDetails(child).show()

In [None]:
import cv2, os

top = 'C:\\Source\\FishyBusiness\\data\\raw\\Channels 2017\\Mudginberri 2017\\Transect 1\\Location 1'
frames = 'C:\\Source\\FishyBusiness\\data\\raw\\Channels 2017\\Mudginberri 2017\\Transect 1\\Location 1\\Frames'
x = 5

In [None]:
for root, dirs, files in os.walk(top):
    for video in files:
        if '.MP4' in video:
            print(video)
            video_path = os.path.join(top, video)
            video_capture = cv2.VideoCapture(video_path)

            frame_rate = video_capture.get(cv2.CAP_PROP_FPS)
            
            frame_position = 0
            
            result, frame = video_capture.read()
            
            while result:
                try:
                    frame_path = os.path.join(frames, video + '_frame_%d.jpg' % frame_position)
                    
                    print('Writing frame %s' % (frame_path))
                    cv2.imwrite(frame_path, frame)
                    
                    frame_position = frame_position + (int(frame_rate) * x)
                    print(frame_position)
                    
                    video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_position)
                    result, frame = video_capture.read()
                except Exception as e:
                    print(e)
            print('Exporting finished.')

In [None]:
file_paths = []

for root, dirs, files in os.walk(top):
    for file in files:
        if '.MP4' in file and '_VIDEO' not in file and 'Frames' not in root:
            file_paths.append(os.path.join(root, file))

for file_path in file_paths:
    video_name = ntpath.basename(file_path)
    print(video_name)
    file_parts = video_name.split('.')
    print(file_parts)
    path_parts = file_path.split(os.sep)[:-1]
    print(path_parts)
    path_parts.append('{0}_VIDEO.{1}'.format(file_parts[0], file_parts[1]))
    out_path = os.sep.join(path_parts)
    print(out_path)
    ffmpeg_exe_path = os.path.join('..', 'src', 'FishOrNoFish', 'ffmpeg-4.1.3-win64-static', 'bin', 'ffmpeg.exe')
    print(ffmpeg_exe_path)
    ffmpeg_command = '{0} -loglevel "verbose" -i "{1}" -c copy -an "{2}"'.format(ffmpeg_exe_path, file_path, out_path)
    print(ffmpeg_command)
    os.system(ffmpeg_command)