In [109]:
from pprint import *
import json
import pandas as pd
import requests

In [110]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

### Get list of images and builds

**Functions**

In [111]:
# Filter images from MCR with a specified tag
def filter_images(filter):
   url = "https://mcr.microsoft.com/v2/_catalog"
   payload = ""
   headers = {}
   response = requests.request("GET", url, headers=headers, data=payload)
   all_images = json.loads(response.text)['repositories']
   # Filter images that have the specified text in the name
   cogs_images = [image for image in all_images if filter in image]

   return cogs_images

# Parse out images to conform to Cognitive Services "Pillars"
def parse_images(images):
   # Parse the images to get the image name, image version, and image tag
   # First one is repo
   pillar = [image.split('/')[1] for image in images]
   # Some images have a tag, some don't.
   image = [image.split('/')[2] if len(image.split('/')) > 2 else '' for image in images]
   # Append 'mcr.microsoft.com/v2/' to the beginning of the images
   image_url = ['mcr.microsoft.com/v2/' + image for image in images]

   # Create a dataframe with the image name, image version, and image tag
   image_df = pd.DataFrame({'pillar': pillar,
                            'image': image,
                            'image_url': image_url})

   return image_df

# Interrogate MCR for all available tags
def get_tags(image_df):
   # Create a dataframe with the image name, image version, and image tag
   image_df['tags'] = ''
   for index, row in image_df.iterrows():
      url = 'https://' + row['image_url'] + '/tags/list'
      payload = ""
      headers = {}
      response = requests.request("GET", url, headers=headers, data=payload)
      tags = json.loads(response.text)['tags']
      image_df.loc[index, 'tags'] = tags

   return image_df

# Get build dates per tag
def get_build_dates(tags_df):
   
   # Create a dataframe with the image name, image version, and image tag
   tags_df['latest_build_date'] = ''

   for index, row in tags_df.iterrows():
      print("{}/{} | Getting build dates for: {}".format(index+1, len(tags_df), row['image']))

      ##########################################################
      # Compare 'latest' with last 3 to check for discrepancies
      ##########################################################
      # Check if 'latest' exists
      #############################
      # The issue here is they have multiple tags with 'latest' - so we gotta sort between those too
      latest_tags = [tag for tag in row['tags'] if 'latest' in tag]
      latest_tag_build_date = '1900-01-01T00:00:00.700342818Z' # Temp to get started
      for tag in latest_tags:
         temp = get_build_date(row['image_url'], tag)
         # If current tag is newer than the latest, update the latest
         if temp > latest_tag_build_date:
            latest_tag_build_date = temp
            latest_tag = tag
      #############################
      # Check last 3
      #############################
      # To contain tag and build date mappings
      tag_n_build_dates = []
      
      # Number to iterate - in case there's less than 3 tags
      if len(row['tags']) < 3:
         to_iterate = len(row['tags'])-1 # Last 1
      else:
         to_iterate = len(row['tags'])-3 # Last 3
      
      # Iterate over last 3 tags per image and get build dates
      for tag in row['tags'][to_iterate:]:
         build_date = get_build_date(row['image_url'], tag)
         tag_n_build_dates.append({'tag': tag, 'build_date': build_date})
      
      # Find the latest build based on build dates we gathered
      latest_builds = max(tag_n_build_dates, key=lambda x: x['build_date'])
      ##########################################################
      # Append whichever is actually the most up to date
      ##########################################################
      if latest_builds['build_date'] > latest_tag_build_date:
         # Append the tag and build date to the dataframe since latest is not actually up to date
         tags_df.loc[index, 'latest_tag'] = latest_builds['tag']
         tags_df.loc[index, 'latest_build_date'] = latest_builds['build_date']
      else:
         # Add the 'latest' tag we found earlier
         tags_df.loc[index, 'latest_tag'] = latest_tag
         tags_df.loc[index, 'latest_build_date'] = latest_tag_build_date

   return tags_df

# Get build date for a given tag
def get_build_date(image_url, tag):
   url = 'https://' + image_url + '/manifests/' + tag
   payload = ""
   headers = {}
   response = requests.request("GET", url, headers=headers, data=payload)
   build_date = json.loads(json.loads(response.text)['history'][0]['v1Compatibility']).get('created')

   return build_date

# Generate markdown documentation
def generate_docs(useful_images_df, MsDocsDict):
   # Create a dataframe with a column for hyperlink
   useful_images_df['msft_docs_link'] = ''

   for index, row in useful_images_df.iterrows():
      # Key for dictionary lookup
      key = ""
      # Split row['image']
      my_array = row['image_url'].split('/')[3:]
      # Loop from i to the end of the array
      for i in range(len(my_array)):
         if i != len(my_array)-1:
            key = key + my_array[i] + '/'
         else:
            key = key + my_array[i]
      # Append dictionary value to dataframe
      useful_images_df.loc[index, 'msft_docs_link'] = MsDocsDict[key]
   
   return useful_images_df

In [112]:
# Generate a list of cognitive services images
cogs_images = filter_images("azure-cognitive-services")
# Store cogs_images in a dataframe
image_df = parse_images(cogs_images)
# Get list of tags from MCR
tags_df = get_tags(image_df)
# Drops publish dates based on tag ordering
tags_and_build_dates_df = get_build_dates(tags_df)
# Drop unnecessary tags column
tags_and_build_dates_df.drop(columns=['tags'], inplace=True)

1/27 | Getting build dates for: language
2/27 | Getting build dates for: sentiment
3/27 | Getting build dates for: keyphrase
4/27 | Getting build dates for: healthcare
5/27 | Getting build dates for: luis
6/27 | Getting build dates for: labeltool
7/27 | Getting build dates for: language
8/27 | Getting build dates for: anomaly-detector
9/27 | Getting build dates for: read
10/27 | Getting build dates for: layout
11/27 | Getting build dates for: businesscard
12/27 | Getting build dates for: receipt
13/27 | Getting build dates for: invoice
14/27 | Getting build dates for: custom-api
15/27 | Getting build dates for: custom-supervised
16/27 | Getting build dates for: id-document
17/27 | Getting build dates for: spatial-analysis
18/27 | Getting build dates for: spatial-analysis
19/27 | Getting build dates for: spatial-analysis
20/27 | Getting build dates for: speech-to-text
21/27 | Getting build dates for: custom-speech-to-text
22/27 | Getting build dates for: text-to-speech
23/27 | Getting b

In [113]:
# Filter tags_and_build_dates_df for build dates greater than 2020-09-16T01:21:40.22121315Z - after this we get real model images
useful_images = tags_and_build_dates_df[tags_and_build_dates_df['latest_build_date'] > '2020-09-16T01:21:40.22121315Z']
useless_images = tags_and_build_dates_df[tags_and_build_dates_df['latest_build_date'] <= '2020-09-16T01:21:40.22121315Z']

# Reset index
useful_images.reset_index(drop=True, inplace=True)
useless_images.reset_index(drop=True, inplace=True)

In [114]:
display(useful_images)

Unnamed: 0,pillar,image,image_url,latest_build_date,latest_tag
0,textanalytics,language,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/language,2022-01-22T03:20:27.700342818Z,latest
1,textanalytics,sentiment,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/sentiment,2022-01-22T03:34:42.749920863Z,3.0.018470001-zh-onprem-amd64
2,textanalytics,keyphrase,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/keyphrase,2022-01-22T03:31:09.435285807Z,3.0.018470001-onprem-amd64
3,textanalytics,healthcare,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/healthcare,2022-01-22T03:31:46.323761586Z,latest
4,language,luis,mcr.microsoft.com/v2/azure-cognitive-services/language/luis,2020-11-24T14:21:18.711028393Z,latest
5,custom-form,labeltool,mcr.microsoft.com/v2/azure-cognitive-services/custom-form/labeltool,2021-09-02T04:04:45.365559451Z,latest-2.1
6,decision,anomaly-detector,mcr.microsoft.com/v2/azure-cognitive-services/decision/anomaly-detector,2020-09-18T07:53:12.83093394Z,latest
7,vision,read,mcr.microsoft.com/v2/azure-cognitive-services/vision/read,2022-01-17T09:12:22.722261697Z,latest
8,form-recognizer,layout,mcr.microsoft.com/v2/azure-cognitive-services/form-recognizer/layout,2022-01-17T09:22:18.68923967Z,latest
9,form-recognizer,businesscard,mcr.microsoft.com/v2/azure-cognitive-services/form-recognizer/businesscard,2021-12-24T17:29:24.403875884Z,latest


💡 I have validated each of the tags above that _aren't_ `latest` that they contain a more recent docker push timestamp than the `latest` tag does - which means `latest` is not most up to date.

In [115]:
display(useless_images)

Unnamed: 0,pillar,image,image_url,latest_build_date,latest_tag
0,samples,language,mcr.microsoft.com/v2/azure-cognitive-services/samples/language/frontend,2019-02-02T00:59:10.578123491Z,latest
1,vision,spatial-analysis,mcr.microsoft.com/v2/azure-cognitive-services/vision/spatial-analysis/telegraf,2020-09-16T01:21:40.22121315Z,latest
2,diagnostic,,mcr.microsoft.com/v2/azure-cognitive-services/diagnostic,2020-01-23T18:44:49.526706845Z,latest


### Generate documentation 

In [116]:
MsDocsDict = {}

MsDocsDict["textanalytics/language"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/language-service/language-detection/how-to/use-containers"
MsDocsDict["textanalytics/sentiment"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/language-service/sentiment-opinion-mining/how-to/use-containers"
MsDocsDict["textanalytics/keyphrase"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/language-service/key-phrase-extraction/how-to/use-containers"
MsDocsDict["textanalytics/healthcare"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/language-service/text-analytics-for-health/how-to/use-containers"
MsDocsDict["language/luis"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/luis/luis-container-howto?tabs=v3"
MsDocsDict["custom-form/labeltool"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/label-tool?tabs=v2-1#set-up-the-sample-labeling-tool"
MsDocsDict["decision/anomaly-detector"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/anomaly-detector/anomaly-detector-container-howto"
MsDocsDict["vision/read"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/computer-vision-how-to-install-containers?tabs=version-3-2"
MsDocsDict["form-recognizer/layout"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=layout#run-the-container-with-the-docker-compose-up-command"
MsDocsDict["form-recognizer/businesscard"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=business-card#run-the-container-with-the-docker-compose-up-command"
MsDocsDict["form-recognizer/receipt"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=receipt#run-the-container-with-the-docker-compose-up-command"
MsDocsDict["form-recognizer/invoice"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=invoice#run-the-container-with-the-docker-compose-up-command"
MsDocsDict["form-recognizer/custom-api"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=custom#-create-a-docker-compose-file"
MsDocsDict["form-recognizer/custom-supervised"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=custom#-create-a-docker-compose-file"
MsDocsDict["form-recognizer/id-document"] = "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/containers/form-recognizer-container-install-run?tabs=id-document#run-the-container-with-the-docker-compose-up-command"
MsDocsDict["vision/spatial-analysis"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/spatial-analysis-container?tabs=desktop-machine"
MsDocsDict["vision/spatial-analysis/diagnostics"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/spatial-analysis-container?tabs=azure-stack-edge#troubleshooting"
MsDocsDict["speechservices/speech-to-text"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-container-howto?tabs=stt%2Ccsharp%2Csimple-format#get-the-container-image-with-docker-pull"
MsDocsDict["speechservices/custom-speech-to-text"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-container-howto?tabs=cstt%2Ccsharp%2Csimple-format#get-the-container-image-with-docker-pull"
MsDocsDict["speechservices/text-to-speech"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-container-howto?tabs=tts%2Ccsharp%2Csimple-format#get-the-container-image-with-docker-pull"
MsDocsDict["speechservices/custom-text-to-speech"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-container-howto?tabs=stt%2Ccsharp%2Csimple-format#text-to-speech-standard-and-neural"
MsDocsDict["speechservices/neural-text-to-speech"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-container-howto?tabs=ntts%2Ccsharp%2Csimple-format#get-the-container-image-with-docker-pull"
MsDocsDict["speechservices/language-detection"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-container-howto?tabs=lid%2Ccsharp%2Csimple-format#get-the-container-image-with-docker-pull"
MsDocsDict["translator/text-translation"] = "https://docs.microsoft.com/en-us/azure/cognitive-services/translator/containers/translator-how-to-install-container"

In [117]:
uselful_images_with_url = generate_docs(useful_images, MsDocsDict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  useful_images_df['msft_docs_link'] = ''
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  useful_images_df.loc[index, 'msft_docs_link'] = MsDocsDict[key]


In [118]:
print(useful_images.to_markdown())

|    | pillar          | image                 | image_url                                                                          | latest_build_date              | latest_tag                      | msft_docs_link                                                                                                                                                                                      |
|---:|:----------------|:----------------------|:-----------------------------------------------------------------------------------|:-------------------------------|:--------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|  0 | textanalytics   | language              | mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/language               | 2022-01-22T03:20:27.700342818Z | latest                          | h