In [1]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

from graph_tool.all import *

import heapq
import requests
import json
import time
import csv
import itertools

In [2]:
def hex_to_int(id):
    '''Map patent id (string) to graph id (int).'''
    return int(id, 16)

def int_to_hex(id):
    '''Map graph id (int) to patent id (string).'''
    return '{0:0x}'.format(id)

In [3]:
DATA = './data/'
graph = load_graph(DATA + 'citations_graph.xml.gz')
graph

<Graph object, directed, with 8274991 vertices and 86284396 edges at 0x7fb515969978>

In [4]:
def list_shortest_path(source, target, graph, verbose=False):
    '''Compute and print the shortest path between two patents.
    This function will pull the patent titles from the patentsview.org api.
    
    Keyword arguments:
    source -- Patent id of source node (string)
    target -- Patent id of target node (string)
    graph -- Patent graph
    verbose -- Set to true to print out shortest path (default=False)
    '''
    
    source_int_id = hex_to_int(source)
    target_int_id = hex_to_int(target)
    
    source_vertex = find_vertex(graph, graph.vertex_properties.id, source_int_id)
    if (len(source_vertex) == 0):
        print('Source not found in graph.')
        return None
    
    target_vertex = find_vertex(graph, graph.vertex_properties.id, target_int_id)
    if (len(target_vertex) == 0):
        print('Target not found in graph.')
        return None
    
    path = shortest_path(graph, source_vertex[0], target_vertex[0])
    
    if (len(path[0]) == 0):
        print('The graph contains no path from source to target.')
        return None
    
    if verbose:
        print('Shortest path:\n--------------')
        for idx, v in enumerate(path[0]):
            patent_id = int_to_hex(graph.vp.id[int(v)])
            patent_URL = 'http://www.patentsview.org/api/patents/query?q={"patent_number":"' + patent_id + '"}'
            patent_info = requests.get(patent_URL).json()
            if (patent_info['patents'] is None):
                patent_title = '-No title available-'
            else:
                patent_title = patent_info['patents'][0]['patent_title']
            print('ID: {}\t{}'.format(patent_id, patent_title))
            if (idx < len(path[0])-1):
                print('↓')
            
    return path

In [9]:
path = list_shortest_path('4558413', '2981877', graph, verbose=True)

The graph contains no path from source to target.


## Manual all shortest paths

In [None]:
class ShortestPathVisitor(BFSVisitor):
    def __init__(self, name, pred, dist):
        self.name = name
        self.pred = pred
        self.dist = dist
            
    def examine_edge(self, e):
        # Getting distance of next node
        next_dist = self.dist[e.source()] + 1
            
        # Checking if target has shorter connections
        if self.pred[e.target()]:
            if next_dist == self.dist[e.target()]:
                self.pred[e.target()].append(int(e.source()))
        else: # First to discover is shortest
            self.pred[e.target()].append(int(e.source()))
            self.dist[e.target()] = next_dist

In [None]:
def all_shortest_paths(graph, pred, source, sink):
    '''Returns all shortest paths from a source to a sink.
    Uses graph indices.
    Returns vertex set, edge set.'''
    if (source == sink):
        return set([sink]), set()
    
    vertices = set([sink])
    edges = set()
    
    for v in pred[sink]:
        edges.add((v,sink))
        v_rec, e_rec = all_shortest_paths(graph, pred, source, v)
        vertices = vertices | v_rec
        edges = edges | e_rec
        
    return vertices, edges

In [None]:
def all_shortest_paths_combined(graph, source, sink_list, dist, pred):
    vertices = set()
    edges = set()
    
    print('Computing all shortest paths from ' + source)    
    source_vertex = find_vertex(graph, graph.vertex_properties.id, hex_to_int(source))[0]
    bfs_search(graph, int(source_vertex), ShortestPathVisitor('SPV_' + source, pred, dist))
        
    for sink in sink_list:
        print('\tto ' + sink)
        sink_vertex = find_vertex(graph, graph.vertex_properties.id, hex_to_int(sink))[0]
        vs, es = all_shortest_paths(graph, pred, int(source_vertex), int(sink_vertex))
        vertices = vertices | vs
        edges = edges | es
        
    return vertices, edges      

Source Patents:

- ..'6285999' Google Page rank
- '9652896' Snapchat AR
- .'8825597' Dropbox
- '8225376' Facebook
- '7904382' Solar City
- '6955484' GoPro
- ..'8046721' Apple slide to unlock
- .'9063330' Oculus Rift
- 'D683268' Tesla car design
- '9134731' Autonomous driving
- ..'8930044' Autonomous medical drone
- '5194299' Post-it



Sink Patents:

- .'4136359' Apple II
- ..'2981877' Semiconductor / IC
- '416194' Electric motor - Nikola Tesla
- '174465' Telephone
- '1647' Telegraph
- '200521' Phonograph
- '223898' Light bulb
- X '430804' Electrical calculating system 
- ..'821393' Flying-machine
- '1867377' Sliced bread
- '1102653' Multistaged rocket
- '4022227' Method of concealing partial baldness 
- X '20050228218' Double anchor strapless dildo 
- '8540' Zipper
- '3009235' Velcro
- '1331952' Bouncy shoe
- .'1143542' Moving picture
- ..'676332' Apparatus for wireless telegraphy

In [None]:
source = '6285999'
sink_list = ['4136359', '2981877', '223898', \
             '821393', '3009235', '1143542', '676332']

In [None]:
dist = graph.new_vertex_property("int")
pred = graph.new_vertex_property("vector<int32_t>")

asp = all_shortest_paths_combined(graph, source, sink_list, dist, pred)

In [None]:
max_dist = 0

for sink in sink_list:
    sink_vertex = find_vertex(graph, graph.vertex_properties.id, hex_to_int(sink))[0]
    sink_dist = dist[sink_vertex]
    if sink_dist > max_dist:
        max_dist = sink_dist

In [None]:
max_dist

## Convert paths to CSVs

In [None]:
def paths_vertices_to_csv(paths, filename, dist, max_dist):
    vertices = []
    
    for p in paths: # For each path
        for v in p[0]: # For all the vertices in that path
            patent_id = int_to_hex(graph.vp.id[int(v)])
            patent_URL = 'http://www.patentsview.org/api/patents/query?q={"patent_number":"' + patent_id + '"}'
            patent_info = requests.get(patent_URL).json()
            if (patent_info['patents'] is None):
                patent_title = '-No title available-'
            else:
                patent_title = patent_info['patents'][0]['patent_title']
            vertex_dist = '<[' + str(dist[v]) + ',' + str(max_dist) + ']>'
            z = dist[v]
            vertices.append([patent_id, vertex_dist, z, patent_title])
            
    output_file = open(filename, 'w')
    with output_file:  
        writer = csv.writer(output_file)
        writer.writerow(['Id', 'Interval', '[z]', 'Label'])
        writer.writerows(vertices)

    return vertices

In [None]:
vertices = paths_vertices_to_csv([asp], './path_vertices.csv', dist, max_dist)

In [None]:
def paths_edges_to_csv(paths, filename):
    edges = []
    
    for p in paths: # For each path
        for e in p[1]: # For all the edges in that path
            source_index = int(e[0])
            source_id = int_to_hex(graph.vp.id[source_index])
            target_index = int(e[1])
            target_id = int_to_hex(graph.vp.id[target_index])
            
            edges.append([source_id, target_id])
            
    output_file = open(filename, 'w')
    with output_file:  
        writer = csv.writer(output_file)
        writer.writerow(['Source', 'Target'])
        writer.writerows(edges)
        
    return edges

In [None]:
edges = paths_edges_to_csv([asp], './path_edges.csv')