In [3]:
import json
import urllib
import sys
from collections import defaultdict

In [4]:
# remove '_' if it is concatenating two words
# remove '_' and append s if it is plural
def sanitize_relation(relation):
	tokens = relation.split('_')
	if tokens[-1] == 's':
		tokens[-2] = tokens[-2] + 's'
		tokens.pop()

	return " ".join(tokens)

def sanitize_compound_arg(arg):
	tokens = arg.split(' - ')
	del tokens[-2:]
	return " ".join(tokens)

In [5]:
def search_api_request(api_key, query):
	service_url = 'https://www.googleapis.com/freebase/v1/search'
	params = {
	  'key': api_key,
	  'query': query
	}

	url = service_url + '?' + urllib.urlencode(params)
	response = json.loads(urllib.urlopen(url).read())
	first_result = response['result'][0]
	topic_name = str(first_result['name'])
	topic_id = str(first_result['mid']) # To be used by the topic API in scraping relations

	return (topic_name, topic_id)


In [6]:

def topic_api_request(api_key, topic):
	service_url = 'https://www.googleapis.com/freebase/v1/topic'
	params = {
	  'key': api_key,
	  'filter': '/people' # Can this be dynamically gotten from search?
	}

	topic_name = topic[0]
	topic_id = topic[1]
	url = service_url + topic_id + '?' + urllib.urlencode(params)
	topic = json.loads(urllib.urlopen(url).read())

	tuples = []
	for property in topic['property']:
		simple_prop = str(property.split('/')[-1])
		simple_prop = sanitize_relation(simple_prop)

		for value in topic['property'][property]['values']:
			arg = value['text'].encode("utf8") 
			if topic['property'][property]['valuetype'] == 'compound':
				arg = sanitize_compound_arg(arg) 
			tuples.append((topic_name, simple_prop, str(arg)))

	return tuples

In [7]:
def print_tuples(tuples):
	for t in tuples:
		print t

In [8]:
def construct_ppdb_from_file(file_name):
    ppdb = defaultdict(list)
    count = 0
    with open(file_name) as f:
        for line in f:
            data = line.split(" ||| ")
            pos_tag = data[0]
            source = data[1]
            target = data[2]
            ppdb[source].append(target)
            count += 1
            
    print str(count) + " paraphrases added to PPDB"
    return ppdb


def expand_tuple_for_phrase(t, phrase, ppdb, tuples):
    print "** expand_tuple_for_phrase ** tuple: "
    print t
    print "and phrase: " + phrase
    new_tuples = []
    for paraphrase in ppdb[phrase]:
        new_tuple = (t[0], paraphrase, t[2])
        new_tuples.append(new_tuple)
    
    print new_tuples
    print ""
    
    return new_tuples

def expand_tuples_with_ppdb(ppdb, tuples):
    new_tuples = []
    for t in tuples:
        phrase = t[1]
        new_tuples += expand_tuple_for_phrase(t, phrase, ppdb, tuples)
        for sub_phrase in phrase.split(' '):
            new_tuples+= expand_tuple_for_phrase(t, sub_phrase, ppdb, tuples)
    tuples += new_tuples

In [9]:
api_key = open(".api_key").read()

query = "Barack Obama" # Join all arguments to form one search query
topic = search_api_request(api_key, query)

print "***"
print "Query: \"" + query + "\" => found topic_name: " + topic[0] + " topic_id:  " + topic[1]
print "***"

tuples = topic_api_request(api_key, topic)
print_tuples(tuples)

***
Query: "Barack Obama" => found topic_name: Barack Obama topic_id:  /m/02mjmr
***
('Barack Obama', 'quotations', "Change will not come if we wait for some other person or some other time. We are the ones we've been waiting for. We are the change that we seek.")
('Barack Obama', 'quotations', "There is not a liberal America and a conservative America - there is the United States of America. There is not a black America and a white America and latino America and asian America - there's the United States of America.")
('Barack Obama', 'quotations', "I don't oppose all wars. What I am opposed to is a dumb war. What I am opposed to is a rash war.")
('Barack Obama', 'quotations', "If you're walking down the right path and you're willing to keep walking, eventually you'll make progress.")
('Barack Obama', 'quotations', 'My job is not to represent Washington to you, but to represent you to Washington.')
('Barack Obama', 'quotations', "Issues are never simple. One thing I'm proud of is that 

In [10]:
ppdb = construct_ppdb_from_file("ppdb-1.0-s-all")

6977679 paraphrases added to PPDB


In [11]:
expand_tuples_with_ppdb(ppdb, tuples)

** expand_tuple_for_phrase ** tuple: 
('Barack Obama', 'quotations', "Change will not come if we wait for some other person or some other time. We are the ones we've been waiting for. We are the change that we seek.")
and phrase: quotations
[]

** expand_tuple_for_phrase ** tuple: 
('Barack Obama', 'quotations', "Change will not come if we wait for some other person or some other time. We are the ones we've been waiting for. We are the change that we seek.")
and phrase: quotations
[]

** expand_tuple_for_phrase ** tuple: 
('Barack Obama', 'quotations', "There is not a liberal America and a conservative America - there is the United States of America. There is not a black America and a white America and latino America and asian America - there's the United States of America.")
and phrase: quotations
[]

** expand_tuple_for_phrase ** tuple: 
('Barack Obama', 'quotations', "There is not a liberal America and a conservative America - there is the United States of America. There is not a bl

In [12]:
print_tuples(tuples)

('Barack Obama', 'quotations', "Change will not come if we wait for some other person or some other time. We are the ones we've been waiting for. We are the change that we seek.")
('Barack Obama', 'quotations', "There is not a liberal America and a conservative America - there is the United States of America. There is not a black America and a white America and latino America and asian America - there's the United States of America.")
('Barack Obama', 'quotations', "I don't oppose all wars. What I am opposed to is a dumb war. What I am opposed to is a rash war.")
('Barack Obama', 'quotations', "If you're walking down the right path and you're willing to keep walking, eventually you'll make progress.")
('Barack Obama', 'quotations', 'My job is not to represent Washington to you, but to represent you to Washington.')
('Barack Obama', 'quotations', "Issues are never simple. One thing I'm proud of is that very rarely will you hear me simplify the issues.")
('Barack Obama', 'quotations', 'M