In [15]:
from pprint import *
import json
import pandas as pd
import requests

In [16]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

**Functions**

In [17]:
# Filter images from MCR with a specified tag
def filter_images(filter):
   url = "https://mcr.microsoft.com/v2/_catalog"
   payload = ""
   headers = {}
   response = requests.request("GET", url, headers=headers, data=payload)
   all_images = json.loads(response.text)['repositories']
   # Filter images that have the specified text in the name
   cogs_images = [image for image in all_images if filter in image]

   return cogs_images

# Parse out images to conform to Cognitive Services "Pillars"
def parse_images(images):
   # Parse the images to get the image name, image version, and image tag
   # First one is repo
   pillar = [image.split('/')[1] for image in images]
   # Some images have a tag, some don't.
   image = [image.split('/')[2] if len(image.split('/')) > 2 else '' for image in images]
   # Append 'mcr.microsoft.com/v2/' to the beginning of the images
   image_url = ['mcr.microsoft.com/v2/' + image for image in images]

   # Create a dataframe with the image name, image version, and image tag
   image_df = pd.DataFrame({'pillar': pillar,
                            'image': image,
                            'image_url': image_url})

   return image_df

# Interrogate MCR for all available tags
def get_tags(image_df):
   # Create a dataframe with the image name, image version, and image tag
   image_df['tags'] = ''
   for index, row in image_df.iterrows():
      url = 'https://' + row['image_url'] + '/tags/list'
      payload = ""
      headers = {}
      response = requests.request("GET", url, headers=headers, data=payload)
      tags = json.loads(response.text)['tags']
      image_df.loc[index, 'tags'] = tags

   return image_df

# Get build dates per tag
def get_build_dates(tags_df):
   
   # Create a dataframe with the image name, image version, and image tag
   tags_df['latest_build_date'] = ''

   for index, row in tags_df.iterrows():
      print("{}/{} | Getting build dates for: {}".format(index+1, len(tags_df), row['image']))

      # To contain tag and build date mappings
      tag_n_build_dates = []
      
      # Number to iterate
      if len(row['tags']) < 3:
         to_iterate = len(row['tags'])-1 # Last 1
      else:
         to_iterate = len(row['tags'])-3 # Last 3
      
      # Iterate over last 3 tags per image and get build dates
      for tag in row['tags'][to_iterate:]:
         url = 'https://' + row['image_url'] + '/manifests/' + tag # Get build dates for each tag
         payload = ""
         headers = {}
         response = requests.request("GET", url, headers=headers, data=payload)
         build_date = json.loads(json.loads(response.text)['history'][0]['v1Compatibility']).get('created')
         tag_n_build_dates.append({'tag': tag, 'build_date': build_date})
      
      # Find the latest build based on build dates we gathered
      latest_build = max(tag_n_build_dates, key=lambda x: x['build_date'])

      # Append the tag and build date to the dataframe
      tags_df.loc[index, 'latest_tag'] = latest_build['tag']
      tags_df.loc[index, 'latest_build_date'] = latest_build['build_date']

   return tags_df

In [18]:
# Generate a list of cognitive services images
cogs_images = filter_images("azure-cognitive-services")
# Store cogs_images in a dataframe
image_df = parse_images(cogs_images)
# Get list of tags from MCR
tags_df = get_tags(image_df)
# Drops publish dates based on tag ordering
tags_and_build_dates_df = get_build_dates(tags_df)
# Drop unnecessary tags column
tags_and_build_dates_df.drop(columns=['tags'], inplace=True)

1/27 | Getting build dates for: language
2/27 | Getting build dates for: sentiment
3/27 | Getting build dates for: keyphrase
4/27 | Getting build dates for: healthcare
5/27 | Getting build dates for: luis
6/27 | Getting build dates for: labeltool
7/27 | Getting build dates for: language
8/27 | Getting build dates for: anomaly-detector
9/27 | Getting build dates for: read
10/27 | Getting build dates for: layout
11/27 | Getting build dates for: businesscard
12/27 | Getting build dates for: receipt
13/27 | Getting build dates for: invoice
14/27 | Getting build dates for: custom-api
15/27 | Getting build dates for: custom-supervised
16/27 | Getting build dates for: id-document
17/27 | Getting build dates for: spatial-analysis
18/27 | Getting build dates for: spatial-analysis
19/27 | Getting build dates for: spatial-analysis
20/27 | Getting build dates for: speech-to-text
21/27 | Getting build dates for: custom-speech-to-text
22/27 | Getting build dates for: text-to-speech
23/27 | Getting b

In [19]:
display(tags_and_build_dates_df)

Unnamed: 0,pillar,image,image_url,latest_build_date,latest_tag
0,textanalytics,language,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/language,2022-01-22T03:20:27.700342818Z,3.0.018470001-onprem-amd64
1,textanalytics,sentiment,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/sentiment,2022-01-22T03:34:42.749920863Z,3.0.018470001-zh-onprem-amd64
2,textanalytics,keyphrase,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/keyphrase,2022-01-22T03:31:09.435285807Z,3.0.018470001-onprem-amd64
3,textanalytics,healthcare,mcr.microsoft.com/v2/azure-cognitive-services/textanalytics/healthcare,2022-01-22T03:31:46.323761586Z,3.0.018470001-onprem-amd64
4,language,luis,mcr.microsoft.com/v2/azure-cognitive-services/language/luis,2020-11-24T14:21:18.711028393Z,1.1.014240001-amd64
5,custom-form,labeltool,mcr.microsoft.com/v2/azure-cognitive-services/custom-form/labeltool,2021-09-02T04:04:45.365559451Z,latest-2.1
6,samples,language,mcr.microsoft.com/v2/azure-cognitive-services/samples/language/frontend,2019-02-02T00:59:10.578123491Z,latest
7,decision,anomaly-detector,mcr.microsoft.com/v2/azure-cognitive-services/decision/anomaly-detector,2020-09-18T07:53:12.83093394Z,1.1.013560003-amd64-preview
8,vision,read,mcr.microsoft.com/v2/azure-cognitive-services/vision/read,2022-01-17T09:12:22.722261697Z,latest
9,form-recognizer,layout,mcr.microsoft.com/v2/azure-cognitive-services/form-recognizer/layout,2022-01-17T09:22:18.68923967Z,2.1.0.018370001-56884c6c-amd64-preview


In [20]:
pprint(tags_and_build_dates_df)

             pillar                  image  \
0     textanalytics               language   
1     textanalytics              sentiment   
2     textanalytics              keyphrase   
3     textanalytics             healthcare   
4          language                   luis   
5       custom-form              labeltool   
6           samples               language   
7          decision       anomaly-detector   
8            vision                   read   
9   form-recognizer                 layout   
10  form-recognizer           businesscard   
11  form-recognizer                receipt   
12  form-recognizer                invoice   
13  form-recognizer             custom-api   
14  form-recognizer      custom-supervised   
15  form-recognizer            id-document   
16           vision       spatial-analysis   
17           vision       spatial-analysis   
18           vision       spatial-analysis   
19   speechservices         speech-to-text   
20   speechservices  custom-speech