In [21]:
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from collections import defaultdict 

In [26]:
# Tokenizes sentence without punctuation
def split_sentence(sentence):
    tokenizer = RegexpTokenizer(r'\w+')
    return tokenizer.tokenize(sentence)

# Part-of-speech tagging
def pos_tagging(sentence):
    return nltk.pos_tag(split_sentence(sentence))

# Returns a list of nouns (NN) in a pos-tagged sentence
def extract_objects(pos_tagged_sentence):
    obj_lst=[]
    for tagged_word in pos_tagged_sentence:
        if(tagged_word[1]=="NN"):
            obj_lst.append(tagged_word[0])
    return obj_lst

# Alternative: Returns a dictionary with pos-tags as keys
# and list of corresponding words as values 
def generate_pos_dict(pos_tagged_sentence):
    pos_dict = defaultdict(list)
    for word in pos_tagged_sentence:
        pos_dict[word[1]].append(word[0])
    return pos_dict
    
def test(sentence_lst):
    for sentence in sentence_lst:
        print(sentence)
        print(split_sentence(sentence))
        print(pos_tagging(sentence))
        print(extract_objects(pos_tagging(sentence)))
        print()

In [32]:
sentence = "I see a bird and a dog flying"
sentence2 = "There's a flamingo dancing on the roof of the big house."
sentence_lst = [sentence,sentence2]

test(sentence_lst)

I see a bird and a dog flying
['I', 'see', 'a', 'bird', 'and', 'a', 'dog', 'flying']
[('I', 'PRP'), ('see', 'VBP'), ('a', 'DT'), ('bird', 'NN'), ('and', 'CC'), ('a', 'DT'), ('dog', 'NN'), ('flying', 'VBG')]
['bird', 'dog']

There's a flamingo dancing on the roof of the big house.
['There', 's', 'a', 'flamingo', 'dancing', 'on', 'the', 'roof', 'of', 'the', 'big', 'house']
[('There', 'EX'), ('s', 'VBZ'), ('a', 'DT'), ('flamingo', 'JJ'), ('dancing', 'NN'), ('on', 'IN'), ('the', 'DT'), ('roof', 'NN'), ('of', 'IN'), ('the', 'DT'), ('big', 'JJ'), ('house', 'NN')]
['dancing', 'roof', 'house']



In [25]:
pos_tagged_sentence = pos_tagging(sentence)
generate_pos_dict(pos_tagged_sentence)

defaultdict(list,
            {'PRP': ['I'],
             'VBP': ['see'],
             'DT': ['a', 'a'],
             'NN': ['bird', 'dog'],
             'CC': ['and'],
             'VBG': ['flying']})