# OCML Development Notes
## Python Development Documentation

Importing the required libraries into the project

In [1]:
import os, io, requests, json, geojson, cv2, glob, xlrd, math, http.client, pyproj, time, uuid
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from decimal import Decimal
import xlsxwriter as xlw
from pandas.io.json import json_normalize
from PIL import Image, ImageDraw, ImageFont
from GPSPhoto import gpsphoto
from datetime import datetime, timedelta
from pytz import timezone
from tqdm import tqdm
from IPython.display import display
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__

Set the maximum number of http requests:

In [2]:
http.client._MAXHEADERS = 10000

Get the blob service client
(See the development notes file for information on how to setup the connection string in the computer's environmental variables)

In [17]:
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

Get the _blob service client_

In [18]:
blob_service = BlobServiceClient.from_connection_string(connection_string)

Get the list of the parent (root) containers:
(albeit not to be used)

In [13]:
#container_list = {}
#allContainers = blobService.list_containers(include_metadata=True)
#for container in all_containers:
#    container_list[container["name"]] = container["metadata"]
#container_list

In [20]:
container_list = []
all_containers = blob_service.list_containers(include_metadata=False)
for container in all_containers:
    container_list.append(container.name)
container_list

['bootdiagnostics-ocdatasci-3a4bcaf6-bcd6-48ef-8ab2-81764ecfcfa3',
 'cardinaldata',
 'originaldata',
 'originalmetadata',
 'photosphere-jsonresults']

Working on the original photosphere data. Obtaining  a list of virtual folders inside the original container:

In [25]:
if "originaldata" in container_list:
    original_vfolders = []
    container_client = ContainerClient.from_connection_string(connection_string, "originaldata")
    for c in container_client.walk_blobs():
        original_vfolders.append(c.name.split("/")[0])
    print(original_vfolders)   
else:
    raise Exception("Original Blob Folder not in Containers List")

['ocpw0019_PH1_2018_337_001_imagery', 'ocpw0019_PH1_2018_338_001_imagery', 'ocpw0019_PH1_2018_338_002_imagery', 'ocpw0019_PH2_2019_088_002_imagery', 'ocpw0019_PH2_2019_089_001_imagery', 'ocpw0019_PH2_2019_090_001_imagery', 'ocpw0019_PH2_2019_090_002_imagery', 'ocpw0019_PH2_2019_090_003_imagery', 'ocpw0019_PH2_2019_091_001_imagery', 'ocpw0019_PH2_2019_091_002_imagery', 'ocpw0019_PH2_2019_092_001_imagery', 'ocpw0019_PH2_2019_093_001_imagery', 'ocpw0019_PH3A_2020_034_001_imagery', 'ocpw0019_PH3B_2020_039_001_imagery', 'ocpw0019_PH3B_2020_039_002_imagery', 'ocpw0019_PH4_2020_038_001_imagery', 'ocpw0019_PH5_2020_037_001_imagery', 'ocpw0019_PH5_2020_037_002_imagery', 'ocpw0019_PH6_2020_034_001_imagery', 'ocpw0019_PH6_2020_034_002_imagery', 'ocpw0019_PH6_2020_035_001_imagery', 'ocpw0019_PH6_2020_036_001_imagery', 'ocpw0019_PH6_2020_038_001_imagery', 'ocpw0019_PH6_2020_319_001_imagery']


Obtaining the list of original metadata for original data containers.

In [23]:
if "originaldata" in container_list:
    original_metadata = []
    container_client = ContainerClient.from_connection_string(connection_string, "originalmetadata")
    metadata_blob_list = container_client.list_blobs()
    for blob in metadata_blob_list:
        original_metadata.append(blob.name)
    print(original_metadata)
else:
    raise Exception("Original Blob Folder not in Containers List")

['ocpw0019_PH1_2018_337_001_imagery.xlsx', 'ocpw0019_PH1_2018_338_001_imagery.xlsx', 'ocpw0019_PH1_2018_338_002_imagery.xlsx', 'ocpw0019_PH2_2019_088_002_imagery.xlsx', 'ocpw0019_PH2_2019_089_001_imagery.xlsx', 'ocpw0019_PH2_2019_090_001_imagery.xlsx', 'ocpw0019_PH2_2019_090_002_imagery.xlsx', 'ocpw0019_PH2_2019_090_003_imagery.xlsx', 'ocpw0019_PH2_2019_091_001_imagery.xlsx', 'ocpw0019_PH2_2019_091_002_imagery.xlsx', 'ocpw0019_PH2_2019_092_001_imagery.xlsx', 'ocpw0019_PH2_2019_093_001_imagery.xlsx', 'ocpw0019_PH3A_2020_034_001_imagery.xlsx', 'ocpw0019_PH3B_2020_039_001_imagery.xlsx', 'ocpw0019_PH3B_2020_039_002_imagery.xlsx', 'ocpw0019_PH4_2020_038_001_imagery.xlsx', 'ocpw0019_PH5_2020_037_001_imagery.xlsx', 'ocpw0019_PH5_2020_037_002_imagery.xlsx', 'ocpw0019_PH6_2020_034_001_imagery.xlsx', 'ocpw0019_PH6_2020_034_002_imagery.xlsx', 'ocpw0019_PH6_2020_035_001_imagery.xlsx', 'ocpw0019_PH6_2020_036_001_imagery.xlsx', 'ocpw0019_PH6_2020_038_001_imagery.xlsx', 'ocpw0019_PH6_2020_319_001_ima

Get the blob list for a given folder:

In [32]:
def get_blob_list(folder, type):
    blob_list = []
    if type == "original":
        generator= container_client.list_blobs(folder, prefix="originaldata/")
    elif type == "cardinal":
        generator = container_client.list_blobs(folder, prefix="cardinaldata/")
    elif type == "metadata":
        generator = container_client.list_blobs(folder, prefix="metadata/")
    for g in generator:
        blob_list.append(g.name)
    print(f"A total of {len(blob_list)} {type} data blobs added to the list for the {folder} container")
    return blob_list

In [33]:
get_blob_list(original_vfolders[0], "original")

A total of 11239 original data blobs added to the list for the ocpw0019_PH1_2018_337_001_imagery container


['ocpw0019_PH1_2018_337_001_imagery/181203_204751116.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204751532.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204751939.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204752374.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204752809.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204753245.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204753681.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204754117.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204754553.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204754969.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204755347.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204755771.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204756147.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204756494.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204756875.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204757210.jpg',
 'ocpw0019_PH1_2018_337_001_imagery/181203_204757541.jpg

In [7]:
def CheckBlobMetadata():
    containerList = get_blob_list()

In [8]:
def GetBlobList(container_name=None):
    if container_name is None:
        container = self.container_name
    else:
        container = container_name
        
    blob_list = []
    generator = blobService.list_blobs(container)