In [None]:
import azureml.core, cv2, keras, io, json, ntpath, os, requests, urllib
print("SDK version:", azureml.core.VERSION)

import matplotlib.pyplot as plt
import numpy as np

from azureml.core import Datastore, Experiment, ScriptRunConfig, Workspace
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.dataset import Dataset
from azureml.core.runconfig import DEFAULT_CPU_IMAGE, RunConfiguration
from azureml.data.data_reference import DataReference
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.train.estimator import Estimator
from azureml.widgets import RunDetails
from pathlib import Path
from PIL import Image
from sklearn.model_selection import train_test_split

In [None]:
%matplotlib inline

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

### Training Data

In [None]:
data_folder_path = os.path.join(Path(os.getcwd()).parent, 'data')
x_train_dir = os.path.join(data_folder_path, 'raw')
y_train_dir = os.path.join(data_folder_path, 'processed')

#### Download Masks from Labelbox

In [None]:
export_file_path = os.path.join(data_folder_path, 'export-2019-10-14T06_16_18.335Z.json')

In [None]:
with open(export_file_path, 'r') as export_file:
    data = json.load(export_file)
    for row in data:
        for object in row['Label']['objects']:
            response = requests.get(object['instanceURI'])
            image = Image.open(io.BytesIO(response.content))
            image = image.convert("RGB")
            image.save(os.path.join(y_train_dir, '{0}_{1}.JPG'.format(row['External ID'].split('.')[0], object['value'])))

In [None]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()
    
# helper function for data visualization    
def denormalize(x):
    """Scale image to range 0..1 for correct plot"""
    x_max = np.percentile(x, 98)
    x_min = np.percentile(x, 2)    
    x = (x - x_min) / (x_max - x_min)
    x = x.clip(0, 1)
    return x
    

# classes for data loading and preprocessing
class Dataset:
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['other_grass', 'para_grass', 'tree']
    
    def __init__(
            self, 
            images_dir, 
            masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]
        
        # convert str names to class values on masks
        self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.masks_fps[i], 0)
        
        # extract certain classes from mask (e.g. cars)
        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')
        
        # add background if mask is not binary
        if mask.shape[-1] != 1:
            background = 1 - mask.sum(axis=-1, keepdims=True)
            mask = np.concatenate((mask, background), axis=-1)
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, mask
        
    def __len__(self):
        return len(self.ids)
    
    
class Dataloder(keras.utils.Sequence):
    """Load data from dataset and form batches
    
    Args:
        dataset: instance of Dataset class for image loading and preprocessing.
        batch_size: Integet number of images in batch.
        shuffle: Boolean, if `True` shuffle image indexes each epoch.
    """
    
    def __init__(self, dataset, batch_size=1, shuffle=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(dataset))

        self.on_epoch_end()

    def __getitem__(self, i):
        
        # collect batch data
        start = i * self.batch_size
        stop = (i + 1) * self.batch_size
        data = []
        for j in range(start, stop):
            data.append(self.dataset[j])
        
        # transpose list of lists
        batch = [np.stack(samples, axis=0) for samples in zip(*data)]
        
        return batch
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return len(self.indexes) // self.batch_size
    
    def on_epoch_end(self):
        """Callback function to shuffle indexes each epoch"""
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)   

In [None]:
# Lets look at data we have
dataset = Dataset(x_train_dir, y_train_dir, classes=['other_grass', 'para_grass', 'tree'])

image, mask = dataset[5] # get some sample
visualize(
    image=image, 
    cars_mask=mask[..., 0].squeeze(),
    sky_mask=mask[..., 1].squeeze(),
    background_mask=mask[..., 2].squeeze(),
)

In [None]:
mask.shape

In [None]:
DEFAULT_CPU_IMAGE

In [None]:
%run Common.ipynb

In [None]:
ws = Workspace.from_config()
print('Name: {0}'.format(ws.name), 'Resource Group: {0}'.format(ws.resource_group), 'Location: {0}'.format(ws.location), 'Subscription Id: {0}'.format(ws.subscription_id), sep = '\n')

In [None]:
compute_name = 'CPU'

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    provisioning_configuration = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2',
                                                                min_nodes = 1,
                                                                max_nodes = 2)

    compute_target = ComputeTarget.create(ws, compute_name, provisioning_configuration)
    
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
    print(compute_target.status.serialize())

In [None]:
default_file_store = ws.get_default_datastore() 

In [None]:
parent_folder = Path(os.getcwd()).parent
print(parent_folder)

data_path = os.path.join(parent_folder, 'data')
print(data_path)
raw_data_path = os.path.join(data_path, 'raw')
print(raw_data_path)
src_path = os.path.join(parent_folder, 'src')
print(src_path)
tools_path = os.path.join(parent_folder, 'tools')
print(tools_path)

source_directory = os.path.join(src_path, 'FishOrNoFish')
print(source_directory)

In [None]:
tools_files = []

for root, dirs, files in os.walk(tools_path):
    for file in files:
        tools_files.append(os.path.join(root, file))
        
default_file_store.upload_files(files=tools_files,
                                target_path='tools/ffmpeg-4.1.3-win64-static',
                                overwrite=False,
                                show_progress=True)

In [None]:
for root, dirs, files in os.walk(raw_data_path):
    for file in files:
        if '.MP4' in file and '_VIDEO' not in file and 'Frames' not in root:
            file_path = os.path.join(root, file)
            video_name = ntpath.basename(file_path)
            target_path = os.sep.join(file_path.split(os.sep)[3:-1])
            file_or_dirs = file_service_list_directories_and_files(account_name, storage_key, share_name, target_path)
            if video_name not in file_or_dirs:
                print('Uploading {0}'.format(file_path))
                default_file_store.upload_files([file_path], target_path=target_path, show_progress=True)

### Pipeline

In [None]:
path_on_datastore = os.sep.join(raw_data_path.split(os.sep)[3:]).replace('\\', '/')

raw_data_reference = DataReference(datastore=default_file_store,
                                   data_reference_name='raw_data',
                                   path_on_datastore=path_on_datastore)

In [None]:
path_on_datastore = os.sep.join(tools_path.split(os.sep)[3:]).replace('\\', '/')

tools_reference = DataReference(datastore=default_file_store,
                                   data_reference_name='tools',
                                   path_on_datastore=path_on_datastore)

In [None]:
conda_dependencies = CondaDependencies()
conda_dependencies.add_conda_package('opencv')

run_configuration = RunConfiguration()
run_configuration.environment.docker.enabled = True
run_configuration.environment.docker.base_image = DEFAULT_CPU_IMAGE
run_configuration.environment.python.user_managed_dependencies = False
run_configuration.environment.python.conda_dependencies = conda_dependencies
run_configuration.target = compute_target

In [None]:
extract_video_step = PythonScriptStep(name='extract_video',
                                      source_directory=source_directory,
                                      script_name='extract_video.py',
                                      arguments=['--raw_data_path', raw_data_reference, '--tools_path', tools_reference],
                                      inputs=[raw_data_reference, tools_reference],
                                      runconfig=run_configuration,
                                      allow_reuse=False)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[extract_video_step])

In [None]:
pipeline_run = Experiment(ws, 'extract_video').submit(pipeline)
pipeline_run

In [None]:
RunDetails(pipeline_run).show()

In [None]:
pipeline_run.wait_for_completion(show_output=True)

In [None]:
extract_frames_step = PythonScriptStep(name='extract_frames',
                                       source_directory=source_directory,
                                       script_name='extract_frames.py',
                                       arguments=['--raw_data', videos, '--raw_frames', frames, '--x', 2],
                                       inputs=[videos, frames],
                                       #outputs=[frames],
                                       runconfig=run_configuration,
                                       allow_reuse=False)

In [None]:
pipeline = Pipeline(workspace=ws, steps=[extract_frames_step])

In [None]:
pipeline_run = Experiment(ws, 'extract_frames').submit(pipeline)
pipeline_run

In [None]:
RunDetails(pipeline_run).show()

In [None]:
pipeline_run.wait_for_completion(show_output=True)

In [None]:
children = pipeline_run.get_children()
for child in children:
    status = child.get_status()
    print('Id:', child.id, 'Script:', child.name, 'Status:', status)
    RunDetails(child).show()

In [None]:
import cv2, os

top = 'C:\\Source\\FishyBusiness\\data\\raw\\Channels 2017\\Mudginberri 2017\\Transect 1\\Location 1'
frames = 'C:\\Source\\FishyBusiness\\data\\raw\\Channels 2017\\Mudginberri 2017\\Transect 1\\Location 1\\Frames'
x = 5

In [None]:
for root, dirs, files in os.walk(top):
    for video in files:
        if '.MP4' in video:
            print(video)
            video_path = os.path.join(top, video)
            video_capture = cv2.VideoCapture(video_path)

            frame_rate = video_capture.get(cv2.CAP_PROP_FPS)
            
            frame_position = 0
            
            result, frame = video_capture.read()
            
            while result:
                try:
                    frame_path = os.path.join(frames, video + '_frame_%d.jpg' % frame_position)
                    
                    print('Writing frame %s' % (frame_path))
                    cv2.imwrite(frame_path, frame)
                    
                    frame_position = frame_position + (int(frame_rate) * x)
                    print(frame_position)
                    
                    video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_position)
                    result, frame = video_capture.read()
                except Exception as e:
                    print(e)
            print('Exporting finished.')

In [None]:
file_paths = []

for root, dirs, files in os.walk(top):
    for file in files:
        if '.MP4' in file and '_VIDEO' not in file and 'Frames' not in root:
            file_paths.append(os.path.join(root, file))

for file_path in file_paths:
    video_name = ntpath.basename(file_path)
    print(video_name)
    file_parts = video_name.split('.')
    print(file_parts)
    path_parts = file_path.split(os.sep)[:-1]
    print(path_parts)
    path_parts.append('{0}_VIDEO.{1}'.format(file_parts[0], file_parts[1]))
    out_path = os.sep.join(path_parts)
    print(out_path)
    ffmpeg_exe_path = os.path.join('..', 'src', 'FishOrNoFish', 'ffmpeg-4.1.3-win64-static', 'bin', 'ffmpeg.exe')
    print(ffmpeg_exe_path)
    ffmpeg_command = '{0} -loglevel "verbose" -i "{1}" -c copy -an "{2}"'.format(ffmpeg_exe_path, file_path, out_path)
    print(ffmpeg_command)
    os.system(ffmpeg_command)