# Surfrider ETL script

## Blob Ops

In [None]:
from azure.storage.blob import ContainerClient
from azure.storage.blob import BlobClient


def blobInContainer(connection_s,container_n):
    ''' 
    blobContainer create a name_list of blobs within container
    Input: params are storage conn string & container name (no full url)
    Output: the list of blobs objects within given container
    '''
    try:
        campaign_container = ContainerClient.from_connection_string(conn_str=connection_s, container_name=container_n)
        blob_list = campaign_container.list_blobs()
        blob_names_list = []
        for blob in blob_list:
            blob_names_list.append(blob.name)
        return blob_names_list
    except:
        print("The container you are trying to list blob from probably does not exist.")
        print("Early exit of ETL process as container probably does not exist.")
        exit()


def blobInfos(connection_s,container_n,blob_n):
    ''' 
    blobInfos provides basic information about a blob object
    Input: params are storage conn string, container name and blob_name only (no full url)
    Output: None, print only
    '''
    try:
        blob_video = BlobClient.from_connection_string(conn_str=connection_s,container_name=container_n, blob_name=blob_n)
        blob_video_url = blob_video.url
        blob_video_prop = blob_video.get_blob_properties()
        blob_video_prop_keys = blob_video_prop.keys()
        print("Blob name:",blob_n)
        print("Blob URL:",blob_video_url)
        #print("blob properties:", blob_video_prop)
        #print("blob properties keys:", blob_video_prop_keys)
    except: 
        print("The blob you are trying to get info from probably does not exist")


def downloadBlob(blobclient):
    ''' 
    downloadBlob from Azure to local file system
    Input: parameter is a blob client object from azure storage sdk
    Output: output is the path of the downloaded blob
    '''
    try:
        with open("/tmp/"+blobclient.blob_name, "wb") as my_blob_dl:
            blob_data = blobclient.download_blob()
            blob_data.readinto(my_blob_dl)
        print("Blob %s downloaded" %blobclient.blob_name)
        print("Blob path: /tmp/%s" %blobclient.blob_name)
        path = "/tmp/"+blobclient.blob_name
        return path
    except:
        print("The blob you are trying to download probably does not exist within container")
        print("Early exit of ETL process")
        exit()

## AI Ops

In [None]:
def getPrediction(video_name):
    '''
    getPrediction sends POST request to an AI inference service, delegated to bash script subprocess
    Input: the name of a video which is expected to be dowloaded in local /tmp before
    Output: the prediction made by AI: a json-like format data but as a list
    '''
    print("Sending video to AI for Trash prediction")
    curl_request_script = ['./curl_request_param.sh',video_name]
    output = []
    request_answer = subprocess.Popen(curl_request_script, stdout=subprocess.PIPE)
    i = 0
    for line in request_answer.stdout:
        print(line)
        output.append(line)
    return output

In [None]:
import json
import logging
import requests

def AIready(url):
    '''
    AIready function evaluate whether AI inference service is available
    Input: takes the url of the AI service to evaluate availability
    Output: returns ready status, a boolean status
    '''
    ready = False
    try:
        AI_request = requests.get(url)
        print("HTTP Status Code: ",AI_request.status_code)
        if AI_request.status_code == 200:
            print("AI inference service is available")
            ready = True
            return ready
        else:
            print("HTTP Status Code: ",AI_request.status_code)
            print("AI server is responding but there might be an issue")
    except requests.exceptions.RequestException:
        print("AI not found, an error has occured")
        return ready


def getPrediction(video_name,url):
    '''
    getPrediction sends POST request to an AI inference service, delegated to bash script subprocess
    Input: the name of a video which is expected to be dowloaded in local /tmp before
    Output: the prediction made by AI: a json-like format data but as a list
    '''
    files = {'file': (f'/tmp/{video_name}', open(f'/tmp/{video_name}', 'rb'), 'application/octet-stream')}
    response = requests.post(f'{url}:5000', files=files)
    if not response.ok:
        logger.error(f'Request to AI failed wih reason {response.reason}.')
    output = [response._content]
    return output


def jsonPrediction(pred):
    ''' 
    jsonPrediction cast a prediction from getPrediction function
    Input: pred, the string result of the previous getPrediction function
    Output: json_prediction, a dictionnary built from a subset of pred string
    '''
    string_prediction = str(pred[0])[2:-3] #removing 2 x first and 3 last characters of pred
    json_prediction = json.loads(string_prediction)
    return json_prediction


def getTrashLabel(frame_2_box):
    ''' 
    getTrashLabel return label from a frame_to_box
    Input: a frame_2_box dictionnary from jsonPrediction
    Output: the value of predicted label
    '''
    return frame_2_box['label']


def mapLabel2TrashIdPG(label):
    '''
    mapLabel2TrashIdPG function is a different mapping between a predicted label by AI and TrashId as defined within TrashType table
    Input: a label predicted by AI
    Output: a TrashId as defined in TrashType table
    '''
    switcher = { 
        "others":"1", #"autre dechet" in PG Data Model mapped to IA "others" label
        "dechet agricole":"2",
        "bottles":"3", #"bouteille boisson" in PG Data Model mapped to IA "bottles" label
        "fragments":"4",#"industriel ou construction in PG Data Model mapped to IA "fragments" label
        "peche et chasse":"5",
        "emballage alimentaire":"6",
        "objet vie courante":"7",
        "autres dechets +10":"8"
    } 
    return switcher.get(label, "nothing")


def mapLabel2TrashIdSQL(label):
    ''' 
    NOTICE: this switcher function is DEPRECATED as it initially standed for SQL Trash Table scheme
    mapLabelTrashId is a switch that converts label to TrashId
    Input: label that comes from getTrashLabel from jsonPrediction dictionnary 
    Output: a TrashId, which is meaningful with respect to Trash_Type table in PostGre
    '''
    switcher = { 
    "Fishing or Hunting":"89B44BAA-69AA-4109-891A-128E012E7E07",
    "Food Packaging":"185FEFA2-EEF2-47A8-873E-26032A4BB3C3",
    "Unknown":"BB4DEA69-218A-40CC-A000-2AE17C37152C",
    "Industrial or Construction Debris":"2A863E38-E5D0-455F-87CE-2B75DA29F59A",
    "fragments":"ED401B92-DC24-44C0-A52A-34CE831092BF",
    "Agricultural Waste":"36B2AFEB-7A7C-44B5-A790-5E5C73BA144D",
    "others":"4BEC18FC-BC48-45B7-AFDA-6BA96BD80921",
    "Common Household Items":"C68E90CF-6E65-4474-BC60-72E1C8513F55",
    "plastic":"6961D0DB-928C-419E-9985-98EEEAF552C7",
    "bottles":"9780940B-D06C-4AAB-8003-AB914981E87A",
    "Drinking Bottles":"BCF549A8-AECD-4BC9-B9B8-B94A8F3758D5",
    "Unknown10":"BC7BB564-BE04-4B4B-9913-FF69780B93A6"
    } 
    return switcher.get(label, "nothing")

## GPS Ops

In [None]:
import os
video = '28022020_Boudigau_4.MP4'
os.system(f'python /tmp/gopro2gpx/gopro2gpx.py -s -vvv /tmp/{video} /tmp/{video}')

In [None]:
# Parse GPX file
import os
import gpxpy
import gpxpy.gpx
import json
import subprocess
import datetime
from datetime import datetime
from datetime import timedelta
from shapely.geometry import Point
from functools import partial
import pyproj
from shapely.ops import transform
from tqdm import tqdm

def goproToGPX(video_name):
    '''
    goproToGPX function extracts GPX file from raw GoPro video
    GPX extraction is delegated to shell scripts that calls gopro2gpx python helper
    Input: the name of a video locally available within /tmp
    Output: the path of the GPX generated file
    '''
    gopro2gpx_script = ['./gopro2gpx_param.sh',video_name]
    result = subprocess.Popen(gopro2gpx_script, stdout=subprocess.PIPE)
    output = []
    i = 0
    for line in result.stdout:
        print(line)
        output.append(line)
    path='/tmp/'+video_name+'.gpx'
    return path


def goproToGPX(video_name):
    result = os.system(f'python /tmp/gopro2gpx/gopro2gpx.py -s -vvv /tmp/{video_name} /tmp/{video_name}')
    path='/tmp/'+video_name+'.gpx'
    return path


def gpsPointList(gpxdata):
    ''' 
    gpsPointList function extract gps points from gpx file
    Input: gpxdata is a gpxpy object that returns data from a parsed gpx file
    Output: gpsPointList return a list of dictionnary points with Time, Lat, Long, Elev
    '''

    point_list = []
    for track in gpxdata.tracks:
        for segment in track.segments: 
            for point in segment.points:
                point_info = {'Time':point.time,'Latitude':point.latitude,'Longitude':point.longitude,'Elevation':point.elevation}
                point_list.append(point_info)
    return point_list


def getMediaInfo(mediafile):
    '''
    getMediaInfo function extract metadata info about a media file, using mediainfo shell command
    Input: a media file like a video
    Output: the metadata about the media
    '''
    cmd = "mediainfo --Output=JSON %s"%(mediafile)
    proc = subprocess.Popen(cmd, shell=True,stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, stderr = proc.communicate()
    data = json.loads(stdout)
    return data


def getDuration(mediafile):
    '''
    getDuration function get the duration of a mediafile, typically a video
    Input: a mediafile, on which we then extract the mediainfo
    Output: the duration of the media
    '''
    data = getMediaInfo(mediafile)
    duration = float(data['media']['track'][0]['Duration'])
    return duration


def createTime(time):
    '''
    createTime function creates a timestamp by adding 1 seconds to input
    Input: a time value, as datetime python format
    Output: the newly created timestamp
    '''
    new_time = time
    new_time = new_time + timedelta(seconds=1)
    return new_time

def createLatitude(lat1,lat2):
    '''
    createLatitude function creates a new Latitude by averaging two others
    Input: lat1 and lat2, 2 x latitudes
    Output: the average latitude
    '''
    new_latitude = (lat1+lat2)/2
    new_latitude = round(new_latitude,6)
    return new_latitude

def createLongitude(long1,long2):
    '''
    createLongitude function creates a new Longitude by averaging two others
    Input: long1 and long2, 2 x Longitudes
    Output: the average Longitude
    '''
    new_longitude = (long1+long2)/2
    new_longitude = round(new_longitude,6)
    return new_longitude

def createElevation(elev1,elev2):
    '''
    createElevation function creates a new Elevation by averaging two others
    Input: elev1 and elev2, 2 x Elevations
    Output: the average Elevation
    '''
    new_elevation = (elev1+elev2)/2
    new_elevation = round(new_elevation,6)
    return new_elevation


def fillGPS(inputGPSList,videoLength):
    '''
    fillGPS function will complete a list of GPS point, by filling in missing points in time series
    Input: 
     - a GPS point list, that comes from the output of the gpsPointList function
     - the related video length, from which GPS data is extracted. Value is given by getDuration
    Output:
    '''
    filledGps = inputGPSList.copy()
    gps_length = len(filledGps)
    iteration_length = int((filledGps[gps_length-1]['Time'] - filledGps[0]['Time']).total_seconds())
    ## this section output a filled gps list of length iteration_length+1 = Delta T between last gps timestamp and first one
    i = 0
    while i < (iteration_length):
        delta = filledGps[i+1]['Time']-filledGps[i]['Time']
        delta = int(delta.total_seconds())
        if delta > 1: # adding a newly created element at index i+1
            missing_time = createTime(filledGps[i]['Time'])
            missing_latitude = createLatitude(filledGps[i]['Latitude'],filledGps[i+1]['Latitude'])
            missing_longitude = createLongitude(filledGps[i]['Longitude'],filledGps[i+1]['Longitude'])
            missing_elevation = createElevation(filledGps[i]['Elevation'],filledGps[i+1]['Elevation'])
            new_gps = {'Time':missing_time,'Latitude':missing_latitude,'Longitude':missing_longitude,'Elevation':missing_elevation}
            filledGps.insert(i+1,new_gps)
        i = i+1
    ## this section add missing point at the end of the list, in case filledGps initial Delta time length is less than actual video length
    if len(filledGps) < videoLength:
        j = 0
        while len(filledGps) < videoLength:
            filledGps.insert(len(filledGps),filledGps[len(filledGps)-1])
            j = j+1

    return filledGps


def longLat2shapePoint(gpsLongLatPoint):
    '''
    longLat2shapePoint function creats a GPS point with a 'the_geom' key instead of Long/Lat pair
    Input: a GPS Point with 'Longitude' and 'Latitude' keys
    Output: a dictionnary for a GPS data with key 'the_geom' built from Long/Lat
    '''
    gpsShapePoint = {'Time':gpsLongLatPoint['Time'],'the_geom':Point(gpsLongLatPoint['Longitude'],gpsLongLatPoint['Latitude']),'Elevation':gpsLongLatPoint['Elevation']}
    return gpsShapePoint


def longLat2shapeList(gpsLongLatList):
    '''
    longLat2shapeList function creates a new GPS Point list with 'the_geom' key instead of LongLat
    Input: a gpsLongLatList that comes from fillGPS, as we expect the missing fill operation done
    Output: a new GPS point list with 'the_geom' key
    '''
    gpsShapeList = []
    for gpsPoint in gpsLongLatList:
        gpsShapePoint = longLat2shapePoint(gpsPoint)
        gpsShapeList.append(gpsShapePoint)
    return gpsShapeList


def geometryTransfo(gpsShapePoint):
    '''
    geometryTransfo function convert a GPS point list from a geo representation to another
    Input: a GPS shape point, meaning, a dictionnary with 'the_geom' key instead of LongLat
    Output: a GPS shape point with the target geometry, here 2154
    '''
    project = partial(
    pyproj.transform,
    pyproj.Proj(init='epsg:4326'), # source coordinate system
    pyproj.Proj(init='epsg:2154')) # destination coordinate system

    geo1 = gpsShapePoint['the_geom']
    geo2 = transform(project,geo1)
    return geo2


def gps2154(gpsShapePointsFilled):
    '''
    gps2154 function transforms a GPS shape point list, into the 2154 geometry
    Input: a GPS shape point list where GPS point source geometry is 4326
    Output: a GPS shape point list where GPS point target geometry is 2154
    '''
    gps2154Points = []
    for point in tqdm(gpsShapePointsFilled):
        geo2154 = geometryTransfo(point)
        gps2154Point = {'Time':point['Time'],'the_geom':geo2154,'Elevation':point['Elevation']}
        gps2154Points.append(gps2154Point)
    return gps2154Points

## PostGre Ops

In [None]:
import os
import psycopg2

def pgConnectionString():
    '''
    pgConnectionString function creates the connection string to connect to PostGre server
    Input: none
    Output: the connection string
    '''
    pgserver = os.getenv("PGSERVER")
    pgdatabase = os.getenv("PGDATABASE")
    pgusername = os.getenv("PGUSERNAME")
    pgpassword = os.getenv("PGPWD")
    sslmode = "require"
    conn_string = "host={0} user={1} dbname={2} password={3} sslmode={4}".format(pgserver, pgusername, pgdatabase, pgpassword, sslmode)
    return conn_string

def pgOpenConnection(conn_string):
    '''
    pgOpenConnection function open a connection to PostGre server
    Input: a connection string formated for PG server, from pgConnectionString output
    Output: in case successful, a postgre connection object
    '''
    try:
        conn = psycopg2.connect(conn_string)
        print("Connection established")
        return conn
    except psycopg2.OperationalError as err:
        print("Connection could not established: ",err)


def pgCloseConnection(connection):
    '''
    pgCloseConnection function closes a connection to a PG server
    Input: a PostGre connection object, output from pgOpenConnection
    Output: None
    '''
    try:
        connection.close()
        print("PG connection closed")
    except:
        print("PG connection could not close successfully")


def trashGPS(trashId,gps2154Points):
    '''
    trashGPS is a dummy helper function that allows to associate a GPS point to a trashId
    This function is expected to be replaced by another one, taking real trash index in video to map correct GPS point.
    Input: a trashId from AI prediction dictionnary
    Output: a list of GPS Point in 2154 geometry
    '''
    length = len(gps2154Points)+1
    gpsIndex = trashId % length
    return gpsIndex


def trashInsert(gps2154Point,trashTypeId,cursor,connexion):
    '''
    trashInsert function is the actual INSERT of a Trash detected by AI within PostGre Trash Table
    Input: a gps2154Point, a TrashTypeId, a postgre cursor, a postgre connection
    Output: the row_id within Trash Table of the Trash which has just been inserted
    '''
    point = gps2154Point['the_geom'].wkt
    elevation = gps2154Point['Elevation']
    timestamp = gps2154Point['Time']
    cursor.execute("INSERT INTO campaign.trash (id, id_ref_campaign_fk,the_geom, elevation, id_ref_trash_type_fk,brand_type,time ) VALUES (DEFAULT, '1faaee65-1edb-45ab-bdd4-15268fccd301',ST_SetSRID(%s::geometry,2154),%s,%s,%s,%s) RETURNING id;", (point,elevation,trashTypeId,'icetea',timestamp))
    connexion.commit()
    row_id = cursor.fetchone()[0]
    return row_id

## Key Tests

### Test: getPrediction()

In [None]:
# This test allows to get a prediction on a small video in /tmp
video_name_test = '28022020_Boudigau_4_short_480.mov'
prediction_test = getPrediction(video_name_test,'http://aiapiplastico-dev.westeurope.cloudapp.azure.com')
prediction_test

### Test: goproToGPX() 

In [None]:
# Video with GPS Data
video_name = '28022020_Boudigau_4.MP4'
gpx_path = goproToGPX(video_name)
gpx_path

### Test: PostGre trashInsert()

In [11]:
# This test allows to validate Insert within Postgre
# This test has getPrediction() and goproToGPX() tests prequesite
import warnings
warnings.filterwarnings('ignore')
# GPX parsing
gpx_file = open(gpx_path,'r',encoding='utf-8')
gpx_data = gpxpy.parse(gpx_file) # data from parsed gpx file
# GPS Points
gpsPoints = gpsPointList(gpx_data)
# GPS Point test
gps_point_test = gpsPoints[0]
print(gps_point_test)
# GPS Shape Point test
gps_shape_point_test = longLat2shapePoint(gps_point_test)
print(gps_shape_point_test)
print(gps_shape_point_test['the_geom'].wkt)
# GPS 2154 test
geo2154 = geometryTransfo(gps_shape_point_test)
gps_2154_point_test = {'Time':gps_shape_point_test['Time'],'the_geom':geo2154,'Elevation':gps_shape_point_test['Elevation']}
print(gps_2154_point_test)
# PG connection_string, connection, cursor
pgConn_string = pgConnectionString()
pgConnection = pgOpenConnection(pgConn_string)
pgCursor = pgConnection.cursor()
# trashInsert() Test
rowID = trashInsert(gps_2154_point_test,1,pgCursor,pgConnection)
print(rowID)

{'Time': datetime.datetime(2020, 2, 28, 11, 52, 7, tzinfo=SimpleTZ("Z")), 'Latitude': 43.6158533, 'Longitude': -1.4485004, 'Elevation': 53.203}
{'Time': datetime.datetime(2020, 2, 28, 11, 52, 7, tzinfo=SimpleTZ("Z")), 'the_geom': <shapely.geometry.point.Point object at 0x10d0f0e80>, 'Elevation': 53.203}
POINT (-1.4485004 43.6158533)
{'Time': datetime.datetime(2020, 2, 28, 11, 52, 7, tzinfo=SimpleTZ("Z")), 'the_geom': <shapely.geometry.point.Point object at 0x10dbdf358>, 'Elevation': 53.203}
Connection could not established:  FATAL:  Cannot connect to the server surfrider-geodata



AttributeError: 'NoneType' object has no attribute 'cursor'

## Main

In [None]:
import warnings
warnings.filterwarnings('ignore')

def main():
    print('############################################################')
    print('################ Plastic Origin ETL process ################')
    print('################  Let\'s predict some Trash  ################')
    print('############################################################')
    print('\n')

    print('###################### Pipeline Step0 ######################')
    print('################ Get Video from Azure Storage ##############')
    # blob storage connection string
    connection_string = os.getenv("CONN_STRING")

    # get list of blobs in container campaign0
    campaign_container_name = 'campaign0'
    blobs_campaign0 = blobInContainer(connection_string,campaign_container_name)
    print("Blobs in container:")
    print(blobs_campaign0)

    # get infos of blob 'goproshort-480p.mov' '28022020_Boudigau_4_short.mp4'
    blob_video_name = 'goproshort-480p.mov'   
    blobInfos(connection_string,campaign_container_name,blob_video_name)

    # download locally in /tmp blob video
    blob_video = BlobClient.from_connection_string(conn_str=connection_string,container_name=campaign_container_name, blob_name=blob_video_name)
    downloadBlob(blob_video)

    print('###################### Pipeline Step1bis ###################')
    print('##################### AI Trash prediction ##################')

    isAIready = AIready('http://aiapisurfrider.northeurope.cloudapp.azure.com:5000')
    logger =  logging.getLogger() #required by getPrediction()

    if isAIready == True:
        prediction = getPrediction(blob_video_name)
    else:
        print("Early exit of ETL workflow as AI service is not available")
        exit()

    '''AIready('http://aiapisurfrider.northeurope.cloudapp.azure.com:5000')
    # get predictions from AI on goproshort-480p.mov
    prediction = getPrediction(blob_video_name)'''

    # cast prediction to JSON/Dictionnary format
    json_prediction = jsonPrediction(prediction)

    print('###################### Pipeline Step1 ######################')
    print('######################  GPX creation  ######################')
    video_name = '28022020_Boudigau_4.MP4'
    gpx_path = goproToGPX(video_name)

    # GPX parsing
    gpx_file = open(gpx_path,'r',encoding='utf-8')
    gpx_data = gpxpy.parse(gpx_file) # data from parsed gpx file

    # GPS Points
    gpsPoints = gpsPointList(gpx_data)

    # Video duration
    print("\n")
    video_duration = getDuration('/tmp/'+video_name)
    print("Video duration in second from metadata:",video_duration)

    # GPS file duration
    timestampDelta = gpsPoints[len(gpsPoints)-1]['Time'] - gpsPoints[0]['Time']
    print("GPS file time coverage in second: ",timestampDelta.seconds)

    print('###################### Pipeline Step2 ######################')
    print('################## Add missing GPS points ##################')
    video_duration_sup = int(video_duration)+1
    gpsPointsFilled = fillGPS(gpsPoints,video_duration_sup)

    print('###################### Pipeline Step3 ######################')
    print('############ Transformation to GPS Shape Points ############')
    gpsShapePointsFilled = longLat2shapeList(gpsPointsFilled)

    print('###################### Pipeline Step4 ######################')
    print('############## Transformation to 2154 Geometry #############')
    gps2154PointsFilled = gps2154(gpsShapePointsFilled)

    print('###################### Pipeline Step5 ######################')
    print('################### Insert within PostGre ##################')
    
    # Get connection string information from env variables
    pgConn_string = pgConnectionString()
    # Open pgConnection
    pgConnection = pgOpenConnection(pgConn_string)
    # Create Cursor
    pgCursor = pgConnection.cursor()


    # INSERTING all detected_trash within PostGre
    rowID_list = []
    for prediction in tqdm(json_prediction['detected_trash']):
        try: 
            # get GPS coordinate
            trashTypeId= prediction['id']
            gpsIndexId = trashGPS(trashTypeId,gps2154PointsFilled)
            trashGps2154Point = gps2154PointsFilled[gpsIndexId]
            # get TrashTypeId from AI prediction
            label = getTrashLabel(prediction)
            trashType = mapLabel2TrashIdPG(label)
            # INSERT within PostGRE
            rowID = trashInsert(trashGps2154Point,trashType,pgCursor,pgConnection)
            rowID_list.append(rowID)
        except:
            print("There was an issue inserting Trash id:" + str(prediction['id']) + " within PostGre")
    print("Successfully inserted " + str(len(rowID_list)) + " Trashes within Trash table")    

    # Close PG connection
    pgCloseConnection(pgConnection)

    print('############################################################')
    print('################   Plastic Origin ETL End   ################')
    print('############################################################')

In [None]:
# Execute main function
if __name__ == '__main__':
    main()

## Main with Args

In [None]:
import argparse
import warnings
warnings.filterwarnings('ignore')

def main(argv):

    ######## Pipeline Step0: Get Video to predict and insert#########
    print('######## Pipeline Step0: Get Video from Azure Blob Storage #########')
    # blob storage connection string
    connection_string = os.getenv("CONN_STRING")

    # get list of blobs in container campaign0
    campaign_container_name = argv.containername
    blobs_campaign0 = blobInContainer(connection_string,campaign_container_name)

    # get infos of blob 'goproshort-480p.mov' '28022020_Boudigau_4_short.mp4'
    blob_video_name = argv.blobname   
    blobInfos(connection_string,campaign_container_name,blob_video_name)

    # download locally in /tmp blob video
    blob_video = BlobClient.from_connection_string(conn_str=connection_string,container_name=campaign_container_name, blob_name=blob_video_name)
    downloadBlob(blob_video)

    ######## Pipeline Step 1bis: AI Trash prediction #########
    print('######## Pipeline Step 1bis: AI Trash prediction #########')

    isAIready = AIready('http://aiapisurfrider.northeurope.cloudapp.azure.com:5000')

    if isAIready == True:
        prediction = getPrediction(blob_video_name)
    else:
        print("Early exit of ETL workflow as AI service is not available")
        exit()

    # cast prediction to JSON/Dictionnary format
    json_prediction = jsonPrediction(prediction)

    ######## Pipeline Step 1: GPX creation ########
    print('######## Pipeline Step 1: GPX creation ########')
    # IMPORTANT REMARK: video_name should be equal to blob name in the future
    # For now, because of video size and internet bandwidth, blob name is a shorten low quality form of the video
    # As a consequence, it does not include GPS information that could be process then
    video_name = argv.videoname
    gpx_path = goproToGPX(video_name)

    # GPX parsing
    gpx_file = open(gpx_path,'r',encoding='utf-8')
    gpx_data = gpxpy.parse(gpx_file) # data from parsed gpx file

    # GPS Points
    gpsPoints = gpsPointList(gpx_data)

    # Video duration
    print("\n")
    video_duration = getDuration('/tmp/'+video_name)
    print("Video duration in second from metadata:",video_duration)

    # GPS file duration
    timestampDelta = gpsPoints[len(gpsPoints)-1]['Time'] - gpsPoints[0]['Time']
    print("GPS file time coverage in second: ",timestampDelta.seconds)

    ######## Pipeline Step 2: Create gpsPointFilled ########
    print('######## Pipeline Step 2: Add missing GPS points ########')
    video_duration_sup = int(video_duration)+1
    gpsPointsFilled = fillGPS(gpsPoints,video_duration_sup)

    ######## Pipeline Step 3: Transform to GPS shapePoints ########
    print('######## Pipeline Step 3: Transformation to GPS Shape Points ########')
    gpsShapePointsFilled = longLat2shapeList(gpsPointsFilled)

    ######## Pipeline Step 4: Transform to 2154 Geometry ########
    print('######## Pipeline Step 4: Transformation to 2154 Geometry ########')
    gps2154PointsFilled = gps2154(gpsShapePointsFilled)

    ######## Pipeline Step 5: Insert within PostGre ########
    print('######## Pipeline Step 5: Insert within PostGre ########')
    
    # Get connection string information from env variables
    pgConn_string = pgConnectionString()
    # Open pgConnection
    pgConnection = pgOpenConnection(pgConn_string)
    # Create Cursor
    pgCursor = pgConnection.cursor()


    # INSERTING all detected_trash within PostGre
    rowID_list = []
    for prediction in tqdm(json_prediction['detected_trash']):
        try: 
            # get GPS coordinate
            trashTypeId= prediction['id']
            gpsIndexId = trashGPS(trashTypeId,gps2154PointsFilled)
            trashGps2154Point = gps2154PointsFilled[gpsIndexId]
            # get TrashTypeId from AI prediction
            label = getTrashLabel(prediction)
            trashType = mapLabel2TrashIdPG(label)
            # INSERT within PostGRE
            rowID = trashInsert(trashGps2154Point,trashType,pgCursor,pgConnection)
            rowID_list.append(rowID)
        except:
            print("There was an issue inserting Trash id:" + str(prediction['id']) + " within PostGre")
    print("Successfully inserted " + str(len(rowID_list)) + " Trashes within Trash table")    

    # Close PG connection
    pgCloseConnection(pgConnection)

        print('############################################################')
    print('################   Plastic Origin ETL End   ################')
    print('############################################################')

In [None]:
# Defining parser
parser = argparse.ArgumentParser()
parser.add_argument('--containername', help='container name to get blob info from and download blob from to be processed by ETL')
parser.add_argument('--blobname', help='blob name to be downloaded from azure blob storage campaign0 container into /tmp')
parser.add_argument('--videoname', help='video name stored locally in /tmp to apply gpx extraction process on')


# Create args parsing standard input
try:
    args = parser.parse_args()

    # Launch ETL execution
    if (args.containername == None or args.blobname == None or args.videoname == None):
        print("Please provide containername, blobname and videoname arguments as they are all mandatory to execute ETL process.")
    else:
        # Execute main function only if 
        if __name__ == '__main__':
            main(args)

except SystemExit:
    print("There was an issue parsing arguments")