In [1]:
import numpy as np
import spacy
from collections import Counter, defaultdict
from sklearn.preprocessing import normalize
import settings
import json
from tags import *

In [5]:
data = json.load(open('data/coco_noun.tags'))
nlp = spacy.load(settings.SPACY_MODEL)

In [10]:
from nltk import Tree

def to_nltk_tree(node):
    if node.n_lefts + node.n_rights > 0:
        return Tree(node.orth_, [to_nltk_tree(child) for child in node.children])
    else:
        return node.orth_
    
def Stree(captions):
    for c in captions:
        for sent in c.sents:
            print()
            to_nltk_tree(sent.root).pretty_print()

## Example 305991

#### Merged ['egg', 'bacon']; ['ham', 'spinach']

In [6]:
id_ = '305991'
caption_list = data['train2014'][id_]['captions']
caption_list

['Plate with breakfast sandwich made with English muffin, egg and ham.',
 'a sandwich on a plate on a table',
 'A white plate topped with a muffin filled with breakfast food.',
 'A sandwich with egg and ham and spinach',
 'Two sandwiches on English muffins featuring greens and cheddar cheese on one sandwich and Canadian bacon and an egg on the other sandwich.']

In [7]:
captions = [nlp(c) for c in caption_list]

In [11]:
Stree(captions)


      Plate                              
  ______|________                         
 |              with                     
 |               |                        
 |            sandwich                   
 |       ________|_______                 
 |      |               made             
 |      |                |                
 |      |               with             
 |      |                |                
 |      |              muffin            
 |      |         _______|_________       
 |      |        |       |        egg    
 |      |        |       |      ___|___   
 .  breakfast English    ,    and     ham


    sandwich      
  _____|_______    
 |     on      on 
 |     |       |   
 |   plate   table
 |     |       |   
 a     a       a  


          plate                     
  __________|_________               
 |    |     |       topped          
 |    |     |         |              
 |    |     |        with           
 |    |     |         |            

## Example 1639 merged ['surfboard', 'wave']

#### {(('board', 'NOUN'), 'compound'): {surf, surf}, (('rid', 'VERB'), 'dobj'): {surfboard, wave}, (('man', 'NOUN'), 'acl'): {riding, standing, riding}}

In [39]:
id_ = '1639'
caption_list = data['train2014'][id_]['captions']
captions = [nlp(c) for c in caption_list]
caption_list

['A man riding a wave on top of a surfboard.',
 'a man on a surf board rides on a wave ',
 'A man riding a surfboard on a wave in the ocean.',
 'A man standing on a surfboard riding waves.',
 'A man surfs a wave on his surf board.']

In [40]:
Stree(captions)


    man                     
  ___|________               
 |   |      riding          
 |   |        |              
 |   |       wave           
 |   |    ____|________      
 |   |   |             on   
 |   |   |             |     
 |   |   |            top   
 |   |   |             |     
 |   |   |             of   
 |   |   |             |     
 |   |   |         surfboard
 |   |   |             |     
 A   .   a             a    


        rides          
      ____|_________    
    man             |  
  ___|____          |   
 |        on        on 
 |        |         |   
 |      board      wave
 |    ____|____     |   
 a   a        surf  a  


           man                     
  __________|_______                
 |   |            riding           
 |   |       _______|_____          
 |   |      |             on       
 |   |      |             |         
 |   |      |            wave      
 |   |      |        _____|_____    
 |   |      |       |           in 
 |   

### Example 263823 merged ['ball', 'baseball']

##### {(('prepare', 'VERB'), 'nsubj'): {player, player}, (('prepare', 'VERB'), 'xcomp'): {swing, swing}, (('player', 'NOUN'), 'compound'): {baseball, baseball, ball, baseball}}

In [34]:
id_ = '263823'
caption_list = data['train2014'][id_]['captions']
captions = [nlp(c) for c in caption_list]
caption_list

['A baseball player prepares to swing at the ball. ',
 'A ball player prepares to swing as the umpire and catcher look on.',
 'Two baseball players and an umpire during a game.',
 'A baseball player getting ready to swing at the ball. ',
 'A baseball game is being played with the batter up.']

In [35]:
Stree(captions)


               prepares               
  ________________|___________         
 |        |                 swing     
 |        |               ____|____    
 |        |              |         at 
 |        |              |         |   
 |      player           |        ball
 |    ____|_______       |         |   
 .   A         baseball  to       the 


                        prepares                          
  _________________________|_______                        
 |        |                      swing                    
 |        |           _____________|____                   
 |        |          |                 look               
 |        |          |      ____________|_____             
 |      player       |     |       |        umpire        
 |    ____|_____     |     |       |     _____|_______     
 .   A         ball  to    as      on  the   and   catcher


             players                      
  ______________|______________            
 |     |        | 

### Example 132725   merged ['bench', 'fence']

#### {(('cover', 'VERB'), 'auxpass'): {is, is}, (('cover', 'VERB'), 'dobj'): {fence, bench}} 

In [36]:
id_ = '132725'
caption_list = data['train2014'][id_]['captions']
captions = [nlp(c) for c in caption_list]
caption_list

['The bench next to the gate is covered in snow. ',
 'A snowy bench is isolated in a cold park',
 'A wooden benched covered in a bunch of snow.',
 'A bench that is covered and standing in snow.',
 'A snow covered bench along side a wrought iron fence']

In [37]:
Stree(captions)


        covered                
  _________|________________    
 |   |          bench       |  
 |   |      ______|____     |   
 |   |     |          next  |  
 |   |     |           |    |   
 |   |     |           to   |  
 |   |     |           |    |   
 |   |     |          gate  in 
 |   |     |           |    |   
 is  .    The         the  snow


              isolated              
  _______________|__________         
 |        |                 in      
 |        |                 |        
 |      bench              park     
 |    ____|______        ___|____    
 is  A         snowy    a       cold


               covered               
  ________________|__________         
 |        |                  in      
 |        |                  |        
 |        |                bunch     
 |        |              ____|____    
 |      wooden          |         of 
 |    ____|_______      |         |   
 .   A         benched  a        snow


        bench               

### Example 284259 merged ['vegetable', 'piece']

#### {(('stew', 'NOUN'), 'conj'): {piece, vegetables}}

In [42]:
id_ = '284259'
caption_list = data['train2014'][id_]['captions']
captions = [nlp(c) for c in caption_list]
caption_list

['A plate topped with toast, stew and vegetables.',
 'A meal of stew, and vegetables sits on a decorated plate.',
 'Plate of food with bread, rice, and mixed vegetables. ',
 'a plate with veggies, some stew and a piece of flatbread',
 'A plate with different vegetables and bread on it.']

In [43]:
Stree(captions)


        plate                       
  ________|_____                     
 |   |        topped                
 |   |          |                    
 |   |         with                 
 |   |          |                    
 |   |        toast                 
 |   |     _____|_____               
 |   |    |          stew           
 |   |    |      _____|_______       
 A   .    ,    and        vegetables


            sits                                    
  ___________|_________________________              
 |          meal                       on           
 |    _______|_______________          |             
 |   |   |   |       |       of      plate          
 |   |   |   |       |       |     ____|_______      
 .   A   ,  and  vegetables stew  a        decorated


         Plate                          
  _________|_________                    
 |   |              with                
 |   |               |                   
 |   |             bread                
 |

### Example 525898 merged ['child', 'bot']

#### {(('sit', 'VERB'), 'nsubj'): {bot, child}}

In [44]:
id_ = '525898'
caption_list = data['train2014'][id_]['captions']
captions = [nlp(c) for c in caption_list]
caption_list

['A boy with a hat on and chocolate on his face near a cake.',
 'A child with chocolate on his face sits at a table with a cake with a lit candle on it. ',
 'Little boy has picked some chocolate off of a birthday cake.',
 'A young boy sitting at a table right next to a cake.',
 'A little bot sits at a table as he nibbles on some frosting from a cake. ']

In [45]:
Stree(captions)


            boy                    
  ___________|___                   
 |   |          with               
 |   |           |                  
 |   |          hat                
 |   |    _______|______________    
 |   |   |   |   |   chocolate  |  
 |   |   |   |   |       |      |   
 |   |   |   |   |       on    near
 |   |   |   |   |       |      |   
 |   |   |   |   |      face   cake
 |   |   |   |   |       |      |   
 A   .   a   on and     his     a  


                        sits                        
  _______________________|____________               
 |          |            |           with           
 |          |            |            |              
 |          |            |           cake           
 |          |            |     _______|_____         
 |        child          |    |            with     
 |    ______|______      |    |             |        
 |   |      |      on    at   |           candle    
 |   |      |      |     |    |    _______