In [31]:
from pattern.graph import Graph

In [3]:
g = Graph() 
g.add_edge('hip_replacement', 'illness', type='is-a') # bone_fracture is-a illness 
g.add_edge('break', 'hip_replacement', type='is-property-of') 
g.add_edge('hip_replacement', 'orthopedic', type='is-related-to') 
 
node = g['hip_replacement'] 
print node.id
print node.links 

hip_replacement
[Node(id='illness'), Node(id='break'), Node(id='orthopedic')]


In [32]:
from pattern.db import CSV 


In [33]:
g = Graph()

In [34]:
data = 'commonsense.csv'
data = CSV.load(data) 
for concept1, relation, concept2, context, weight in data: 
    g.add_edge( 
        concept1, 
        concept2, 
          type = relation, 
        weight = min(int(weight) * 0.1, 1.0)) 

In [35]:
print g

{u'fawn': Node(id=u'fawn'), u'ladybird': Node(id=u'ladybird'), u'dynamic': Node(id=u'dynamic'), u'yellow': Node(id=u'yellow'), u'four': Node(id=u'four'), u'woods': Node(id=u'woods'), u'sleep': Node(id=u'sleep'), u'heavy metal': Node(id=u'heavy metal'), u'appetite': Node(id=u'appetite'), u'hate': Node(id=u'hate'), u'shamble': Node(id=u'shamble'), u'maidenhair': Node(id=u'maidenhair'), u'electricity': Node(id=u'electricity'), u'spin': Node(id=u'spin'), u'swan': Node(id=u'swan'), u'Scottsdale': Node(id=u'Scottsdale'), u's57': Node(id=u's57'), u's56': Node(id=u's56'), u'Vulcan': Node(id=u'Vulcan'), u'goa trance': Node(id=u'goa trance'), u's51': Node(id=u's51'), u's50': Node(id=u's50'), u'pride': Node(id=u'pride'), u'Akzidenz-Grotesk': Node(id=u'Akzidenz-Grotesk'), u'Uffizi': Node(id=u'Uffizi'), u'merchant': Node(id=u'merchant'), u'@': Node(id=u'@'), u'hormone': Node(id=u'hormone'), u'Donald Duck': Node(id=u'Donald Duck'), u'risk': Node(id=u'risk'), u'geology': Node(id=u'geology'), u'cemeta

In [36]:
def halo(node, depth=2): 
    return node.flatten(depth)

In [37]:
def field(node, depth=3, fringe=2): 
    def traversable(node, edge): 
        return edge.node2 == node and edge.type == 'is-a'
    g = node.graph.copy(nodes=node.flatten(depth, traversable)) 
    g = g.fringe(depth=fringe) 
    g = [node.graph[n.id] for n in g if n != node] 
    return g

In [38]:
PROPERTIES = [e.node1.id for e in g.edges if e.type == 'is-property-of'] 
PROPERTIES = dict.fromkeys(PROPERTIES, True) 

In [39]:
cache = {} # Cache results for faster reuse.
  
def properties(node): 
    if node.id in cache: 
        return cache[node.id] 
    g = node.graph.copy(nodes=halo(node)) 
    p = (n for n in g.nodes if n.id in PROPERTIES) 
    p = reversed(sorted(p, key=lambda n: n.centrality)) 
    p = [node.graph[n.id] for n in p] 
    cache[node.id] = p 
    return p 

In [40]:
def similarity(node1, node2, k=3): 
    g = node1.graph 
    h = lambda id1, id2: 1 - int(g.edge(id1, id2).type == 'is-property-of') 
    w = 0.0
    for p1 in properties(node1)[:k]: 
        for p2 in properties(node2)[:k]: 
            p = g.shortest_path(p1, p2, heuristic=h) 
            w += 1.0 / (p is None and 1e10 or len(p)) 
    return w / k 

In [41]:
def nearest_neighbors(node, candidates=[], k=3): 
    w = lambda n: similarity(node, n, k) 
    return sorted(candidates, key=w, reverse=True) 

In [42]:
print nearest_neighbors(g['creepy'], field(g['animal'])) 

[Node(id=u'vulture'), Node(id=u'octopus'), Node(id=u'bat'), Node(id=u'wolf'), Node(id=u'piranha'), Node(id=u'mole'), Node(id=u'grizzly'), Node(id=u'raven'), Node(id=u'owl'), Node(id=u'rhinoceros'), Node(id=u'sea urchin'), Node(id=u'tick'), Node(id=u'spider'), Node(id=u'vampire'), Node(id=u'earthworm'), Node(id=u'Sleipnir'), Node(id=u'crow'), Node(id=u'Cheshire Cat'), Node(id=u'reptile'), Node(id=u'fish'), Node(id=u'arachnid'), Node(id=u'pit bull'), Node(id=u'worm'), Node(id=u'parrot'), Node(id=u'turtle'), Node(id=u'toad'), Node(id=u'amphibian'), Node(id=u'gnu'), Node(id=u'panther'), Node(id=u'caterpillar'), Node(id=u'scorpion'), Node(id=u'panda'), Node(id=u'invertebrate'), Node(id=u'eagle'), Node(id=u'penguin'), Node(id=u'snail'), Node(id=u'slug'), Node(id=u'bird of prey'), Node(id=u'cockroach'), Node(id=u'clam'), Node(id=u'snake'), Node(id=u'cat'), Node(id=u'crocodile'), Node(id=u'tasmanian devil'), Node(id=u'hippopotamus'), Node(id=u'salamander'), Node(id=u'boar'), Node(id=u'dove'), 

In [74]:
from pattern.web import Google, plaintext 
from pattern.search import search 
 
def learn(concept): 
    q = 'I think %s is *' %concept 
    p = [] 
    g = Google(license='AIzaSyB6Ld4LCKq3kAhcmp0Hkfby909CQqvbZ8w', language='en') 
    for i in range(10): 
        for result in g.search(q, start=i, cached=True): 
            #print result
            m = plaintext(result.description) 
            m = search(q, m) # use * as wildcard 
            #print m
            if m: 
                p.append(m[0][-1].string) 
    return [w for w in p if w in PROPERTIES] 

In [75]:
#t = learn("England")
#print t
for p in learn('England'):
    #print p
    g.add_edge(p, 'England', type='is-property-of') 

In [76]:
print nearest_neighbors(g['England'], field(g['animal']))

[Node(id=u'praying mantis'), Node(id=u'stag'), Node(id=u'bird of prey'), Node(id=u'lion'), Node(id=u'rooster'), Node(id=u'vampire'), Node(id=u'mockingbird'), Node(id=u'badger'), Node(id=u'panther'), Node(id=u'bat'), Node(id=u'dolphin'), Node(id=u'grizzly'), Node(id=u'panda'), Node(id=u'donkey'), Node(id=u'mosquito'), Node(id=u'falcon'), Node(id=u'hawk'), Node(id=u'osprey'), Node(id=u'raptor'), Node(id=u'goose'), Node(id=u'rhinoceros'), Node(id=u'cat'), Node(id=u'dove'), Node(id=u'scorpion'), Node(id=u'lemming'), Node(id=u'piranha'), Node(id=u'boar'), Node(id=u'mare'), Node(id=u'horse'), Node(id=u'tapir'), Node(id=u'colt'), Node(id=u'stallion'), Node(id=u'burro'), Node(id=u'kangaroo'), Node(id=u'eagle'), Node(id=u'mouse'), Node(id=u'crow'), Node(id=u'raven'), Node(id=u'goldfish'), Node(id=u'sheep'), Node(id=u'albatross'), Node(id=u'hippopotamus'), Node(id=u'ferret'), Node(id=u'child'), Node(id=u'doe'), Node(id=u'reptile'), Node(id=u'Sleipnir'), Node(id=u'caterpillar'), Node(id=u'hamster

In [78]:
print nearest_neighbors(g['England'], field(g['person'],2,1))

[Node(id=u'Abraham Lincoln'), Node(id=u'David Carson'), Node(id=u'H. P. Lovecraft'), Node(id=u'Jerry Springer'), Node(id=u'man'), Node(id=u'woman'), Node(id=u'Arnold Schwarzenegger'), Node(id=u'Daniel Craig'), Node(id=u'Harvey Keitel'), Node(id=u'Adolf Hitler'), Node(id=u'Al Capone'), Node(id=u'John Cleese'), Node(id=u'communicator'), Node(id=u'innovator'), Node(id=u'Jesus Christ'), Node(id=u'Nicola Tesla'), Node(id=u'Alfred Hitchcock'), Node(id=u'John Baskerville'), Node(id=u'Thomas More'), Node(id=u'Neil Armstrong'), Node(id=u'problem solver'), Node(id=u'Adrian Frutiger'), Node(id=u'Giambattista Bodoni'), Node(id=u'Mark Zuckerberg'), Node(id=u'Leonardo da Vinci'), Node(id=u'referee'), Node(id=u'Martin Luther King'), Node(id=u'Matthew Carter'), Node(id=u'Albert Einstein'), Node(id=u'Osama Bin Laden'), Node(id=u'Karl Marx'), Node(id=u'Leonard Nimoy'), Node(id=u'loner'), Node(id=u'Christophe Plantin'), Node(id=u'Jonathan Barnbrook'), Node(id=u'Donald Knuth'), Node(id=u'Pierre B\xe9zier'

In [82]:
g.export('test',directed=True, frames = 1, ipf=0)