In [0]:
## Installation required when using Google Colab
!pip install webcolors
!pip install word2number
!pip install -r requirements.txt

Collecting webcolors
  Downloading https://files.pythonhosted.org/packages/8b/ff/c21df7e08e68a1a84b947992c07dfed9cfe7219d068cb7728358d065c877/webcolors-1.10-py2.py3-none-any.whl
Installing collected packages: webcolors
Successfully installed webcolors-1.10
Collecting word2number
  Downloading https://files.pythonhosted.org/packages/4a/29/a31940c848521f0725f0df6b25dca8917f13a2025b0e8fcbe5d0457e45e6/word2number-1.1.zip
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py) ... [?25l[?25hdone
  Created wheel for word2number: filename=word2number-1.1-cp36-none-any.whl size=5588 sha256=c5800e98aa7944e452dc75eaa5f9dd7f860b1b85106fcb2b0297d341d668f0bd
  Stored in directory: /root/.cache/pip/wheels/46/2f/53/5f5c1d275492f2fce1cdab9a9bb12d49286dead829a4078e0e
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: '

In [0]:
## Imports
import spacy
from spacy import displacy
import webcolors
from word2number import w2n
import json

In [0]:
## Object to be used in Sketching module. Contains the necessary details 
# Find another name?
class Object:
  def __init__(self, name, color="white", size=1, number = 1, location="random"):
    self.name = name
    self.color = color
    self.size = size
    self.number = number
    self.location = location
  
  def print(self):
    print("Name: ",self.name,"\tColor:",self.color," Size:",self.size," Number:",self.number," Location:",self.location)

  def to_json(self):
    return json.dumps(self.__dict__)

In [0]:
## Functions to identify what information a feature gives about the object
size_map =  [["big","enormous","giant","gigantic","fat","great","huge","immense","large","massive","overweight","wide","titanic","thick","tall"],
            ["little","small","mini","miniature","petite","tiny","thin","slim","short"]]

def isSize(feature_txt):
  if(feature_txt in size_map[0]):
    return 2
  elif(feature_txt in size_map[1]):
    return 0.5
  else :
    return False

def isColor(feature_txt):
  return (feature_txt in webcolors.CSS3_NAMES_TO_HEX)

def isNumber(feature_txt):
  try:
    w2n.word_to_num(feature_txt)
    return True
  except:
    return False

def isPreposition(feature):
  return feature.dep_ == "prep"

In [0]:
## Find the main object
def main_object(doc):
  obj = doc[0]
  for token in doc:
    if(token.pos_ == "NOUN" and obj.pos_ !="NOUN"):
      obj = token
    if(token.dep_ == "attr"):
      return token
  return obj

## Extract the relevant information of the main object

def extract_features2(doc, main_obj_token):
  ## Object of interest, dependency tag = "attr"
  # obj = [token for token in doc if (token.dep_ == "attr")]
  # print(type(obj))
  return ([child for child in main_obj_token.children])

## Creates an "Object" based on the given information
def match_features(feature_lst,main_obj_token):
  ob = Object(name = main_obj_token.lemma_)
  for feature in feature_lst:
    feature_txt = feature.lemma_

    size = isSize(feature_txt)
    if(size):
      ob.size = size

    elif(isColor(feature_txt)):
      ob.color = feature_txt

    elif(isNumber(feature_txt)):
      number = w2n.word_to_num(feature_txt)
      ob.number = number

    elif(isPreposition(feature)):
      location = [child.lemma_ for child in feature.children if (child.tag_ == "NN")][0]
      ob.location = { "Preposition": feature.lemma_,
                      "Location": location }

  return ob

In [0]:
def sentence_processing(sentence):
  doc = nlp(sentence)
  main_obj_token = main_object(doc)
  obj = match_features(extract_features2(doc, main_obj_token), main_obj_token)

  # print(sentence)
  # obj.print()
  return obj.to_json()

In [0]:
## Some functions to help analyzing the sentence

# Lemma: Base form of the token, with no inflectional suffixes
# Part of Speech: Coarse-grained part-of-speech
# Tag: Fine-grained part-of-speech
# Dep: Syntactic dependency relation

def display_tokens(doc):
  start = '\033[36m'
  end = "\033[0;0m"
  print(start.ljust(45),"Lemma PoS Tag Dep",end)

  for token in doc:
    print(token.text.ljust(40) ,token.lemma_, token.pos_, token.tag_, token.dep_.ljust(30),"\t",spacy.explain(token.dep_))

def analyze_dependency(doc):
  for token in doc: 
    print("Token:",token,"\n\tAncestors: ",[ancestor for ancestor in token.ancestors],"\n\tChildren: ",[child for child in token.children])
    print("-------------------")

def dependency_tree(doc):
  return displacy.render(doc, jupyter=True, style='dep')

In [0]:
display_tokens(doc)

[36m                                         Lemma PoS Tag Dep [0;0m
Three                                    three NUM CD nummod                         	 numeric modifier
big                                      big ADJ JJ amod                           	 adjectival modifier
blue                                     blue ADJ JJ amod                           	 adjectival modifier
birds                                    bird NOUN NNS ROOT                           	 None
on                                       on ADP IN prep                           	 prepositional modifier
the                                      the DET DT det                            	 determiner
roof                                     roof NOUN NN pobj                           	 object of preposition
of                                       of ADP IN prep                           	 prepositional modifier
the                                      the DET DT det                            	 determiner
house 