<a href="https://colab.research.google.com/github/ml2-picme/PicMe/blob/master/imagenet_processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# returns a words list, including parent and children words for a given synset-ID
def getWords(synsetId):
  result = getWordsBySynsetId(synsetId)
  if len(result) > 0:
    # get child IDs
    parentToChildren = getParentToChildrenDictionary()
    if synsetId in parentToChildren:
      children = getParentToChildrenDictionary()[synsetId]
    else:
      children = []
    # get words for child IDs
    print(('retrieving words for children: ' + str(children)))
    result.extend(getWordsBySynsetIds(children))

    # get parentd IDs
    childToParents = getChildToParentsDictionary()
    if synsetId in childToParents:
      parents = childToParents[synsetId]
    else:
      parents = []
    # get words for parent IDs
    print(('retrieving words for parents: ' + str(parents)))
    result.extend(getWordsBySynsetIds(parents))
  
  return result

# returns a words list from image-net for a given synset-ID
def getWordsBySynsetId(synsetId):
  # GET from image-net using curl
  url = 'http://www.image-net.org/api/text/wordnet.synset.getwords?wnid=' + synsetId
  result = !curl $url
  
  # in case of invalid id we get an 'invalid url' response
  if 'Invalid url!' in result:
    result = []
    
  print('Synset-ID ' + synsetId + ' has words ' + str(result))
  return result

# returns a words list from image-net for a list of synset-IDs
def getWordsBySynsetIds(synsetIdList):
  result = []
  if len(synsetIdList) > 0:
    for synsetId in synsetIdList:
      result.extend(getWordsBySynsetId(synsetId))
  return result

# returns a dictionary containing parent synset-IDs as keys and for each key a list of children synset ids
def getParentToChildrenDictionary():
  # GET from image-net using curl
  # response is a list where each item is of format 'parent child'
  wordnetHierachyList = !curl http://www.image-net.org/archive/wordnet.is_a.txt

  # check for invalid url response
  if 'The URL is not valid.' in wordnetHierachyList:
    return {}
  
  # transform list into dictionary
  d = {}
  for item in wordnetHierachyList:
    parentChild = item.split(' ')
    parent = parentChild[0]
    child = parentChild[1]
    # is key (=parent) with value(=list of children) already defined?
    if parent in d:
      # add child to list
      list = d[parent]
      list.append(child)
    else:
      # create new list
      d[parent] = [child]
  return d

# returns a dictionary containing children synset-IDs as keys and for each key a list of parent synset ids
def getChildToParentsDictionary():
  # GET from image-net using curl
  # response is a list where each item is of format 'parent child'
  wordnetHierachyList = !curl http://www.image-net.org/archive/wordnet.is_a.txt

  # check for invalid url response
  if 'The URL is not valid.' in wordnetHierachyList:
    return {}

  # transform list into dictionary
  d = {}
  for item in wordnetHierachyList:
    parentChild = item.split(' ')
    parent = parentChild[0]
    child = parentChild[1]
    # is key (=child) with value(=list of parents) already defined?
    if child in d:
      # add parent to list
      list = d[child]
      list.append(parent)
    else:
      # create new list
      d[child] = [parent]
  return d
  

In [148]:
# example for gettings words for synset-ID n06359193
getWords('n06359193')

Synset-ID n06359193 has words ['web site', 'website', 'internet site', 'site']
retrieving words for children: ['n06359467', 'n06359657']
Synset-ID n06359467 has words ['chat room', 'chatroom']
Synset-ID n06359657 has words ['portal site', 'portal']
retrieving words for parents: ['n03082979']
Synset-ID n03082979 has words ['computer', 'computing machine', 'computing device', 'data processor', 'electronic computer', 'information processing system']


['web site',
 'website',
 'internet site',
 'site',
 'chat room',
 'chatroom',
 'portal site',
 'portal',
 'computer',
 'computing machine',
 'computing device',
 'data processor',
 'electronic computer',
 'information processing system']