# Mount FileDataset and extract video frames

In [None]:
!pip install --upgrade azureml-sdk azureml-dataprep opencv-python matplotlib

In [None]:
from azureml.core import Workspace, Dataset

workspace = Workspace.from_config()
print(workspace)

In [None]:
dataset_name="videos-samples"
dataset = Dataset.get_by_name(workspace, name=dataset_name)
print(dataset.name)

In [None]:
import cv2
import matplotlib.pyplot as plt
def processVideo(video_file_path):
     video_capture = cv2.VideoCapture(video_file_path)
     print(f"{video_file_path}: file opened {video_capture.isOpened()}")
     fps = int(video_capture.get(cv2.CAP_PROP_FPS))
     print(f"{video_file_path}: fps # {fps}")
     frame_count = 0
     video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
     success, frame = video_capture.read()
     print(f"{video_file_path}: {success} reading first frame")
     while success:
         temp_file_name = f"/tmp/video_frame_{frame_count:05d}.jpg"
         # convert to RGB
         rgb_frame =  cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         plt.imsave(temp_file_name, rgb_frame)
         print(f"{frame_count:05d}: Saved frame in {temp_file_name}")
         frame_count += 1
         success, frame = video_capture.read()


In [None]:
# mount dataset manually instead of using
# with dataset.mount() as mount_context:
mount_context=dataset.mount()
mount_context.start()
# See where we mounted the file dataset
print(mount_context.mount_point)
# List files in there
print(os.listdir(mount_context.mount_point))


In [None]:

# Loop through files
for f in os.listdir(mount_context.mount_point):
    if os.path.splitext(f)[-1] == '.mp4': # Process only files ending in mp4
        full_file_path = os.path.join(mount_context.mount_point, f)
        processVideo(full_file_path)


In [None]:
# Unmount dataset
mount_context.stop()

## Identity passthrough

Create an ADLS Gen 2 datasource selecting `No` to the `Save credentials with the datastore for data access (Preview)` option. Configure the [ACL](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-access-control) to have access to the folder where you have the data. Don't forget to include ACL on subfolders as well. Define a new file dataset. Make sure that you can see files in that file dataset, otherwise your permissions are wrong. See following image as a sample on how a working file dataset would look like (passing your credentials to the datastore):
![ADSL file dataset](./MountFiles.ADLS.png)

In [None]:
import os

passthrough_ds = Dataset.get_by_name(workspace, name="adls-file-ds")
print(passthrough_ds)

# mount dataset 
with passthrough_ds.mount() as mount_context:
    print(mount_context.mount_point)
    # List files in there
    print(os.listdir(mount_context.mount_point))

In [None]:
# If I need to specify tenant
# os.environ['AZUREML_DATA_ACCESS_TENANT_ID']="f1e73557-1454-436a-9ede-fcdce055eac8"
# !printenv