In [1]:
import numpy as np

from graph_tool.all import *

import heapq
import requests
import json
import time
import csv

In [2]:
def hex_to_int(id):
    '''Map patent id (string) to graph id (int).'''
    return int(id, 16)

def int_to_hex(id):
    '''Map graph id (int) to patent id (string).'''
    return '{0:0x}'.format(id)

In [3]:
DATA = './data/'
graph = load_graph(DATA + 'citations_graph.xml.gz')
graph

<Graph object, directed, with 8274991 vertices and 86284396 edges at 0x7fc8d56a87f0>

In [None]:
def list_shortest_path(source, target, graph, verbose=False):
    '''Compute and print the shortest path between two patents.
    This function will pull the patent titles from the patentsview.org api.
    
    Keyword arguments:
    source -- Patent id of source node (string)
    target -- Patent id of target node (string)
    graph -- Patent graph
    verbose -- Set to true to print out shortest path (default=False)
    '''
    
    source_int_id = hex_to_int(source)
    target_int_id = hex_to_int(target)
    
    source_vertex = find_vertex(graph, graph.vertex_properties.id, source_int_id)
    if (len(source_vertex) == 0):
        print('Source not found in graph.')
        return None
    
    target_vertex = find_vertex(graph, graph.vertex_properties.id, target_int_id)
    if (len(target_vertex) == 0):
        print('Target not found in graph.')
        return None
    
    path = shortest_path(graph, source_vertex[0], target_vertex[0])
    
    if (len(path[0]) == 0):
        print('The graph contains no path from source to target.')
        return None
    
    if verbose:
        print('Shortest path:\n--------------')
        for idx, v in enumerate(path[0]):
            patent_id = int_to_hex(graph.vp.id[int(v)])
            patent_URL = 'http://www.patentsview.org/api/patents/query?q={"patent_number":"' + patent_id + '"}'
            patent_info = requests.get(patent_URL).json()
            if (patent_info['patents'] is None):
                patent_title = '-No title available-'
            else:
                patent_title = patent_info['patents'][0]['patent_title']
            print('ID: {}\t{}'.format(patent_id, patent_title))
            if (idx < len(path[0])-1):
                print('↓')
            
    return path

In [None]:
# Shortest path between Snapchat Augmented Reality and Apple II patents
path = list_shortest_path('9652896', '4136359', graph, verbose=True)

In [None]:
path

In [None]:
asp = all_shortest_paths(graph, hex_to_int('9652896'), hex_to_int('4136359'))

In [None]:


i = 0
for p in all_paths(graph, hex_to_int('9652896'), hex_to_int('4136359'), cutoff=2):
    print(p)
    i += 1
    if i > 3:
        break

## Convert paths to CSVs

In [None]:
def paths_vertices_to_csv(paths, filename):
    vertices = []
    
    for p in paths: # For each path
        for v in p[0]: # For all the vertices in that path
            patent_id = int_to_hex(graph.vp.id[int(v)])
            patent_URL = 'http://www.patentsview.org/api/patents/query?q={"patent_number":"' + patent_id + '"}'
            patent_info = requests.get(patent_URL).json()
            if (patent_info['patents'] is None):
                patent_title = '-No title available-'
            else:
                patent_title = patent_info['patents'][0]['patent_title']
            vertices.append([patent_id, patent_title])
            
    output_file = open(filename, 'w')
    with output_file:  
        writer = csv.writer(output_file)
        writer.writerow(['Id', 'Label'])
        writer.writerows(vertices)

    return vertices

In [None]:
vertices = paths_vertices_to_csv([path], './path_vertices.csv')

In [None]:
def paths_edges_to_csv(paths, filename):
    edges = []
    
    for p in paths: # For each path
        for e in p[1]: # For all the edges in that path
            source_index = int(e.source())
            source_id = int_to_hex(graph.vp.id[source_index])
            target_index = int(e.target())
            target_id = int_to_hex(graph.vp.id[target_index])
            
            edges.append([source_id, target_id])
            
    output_file = open(filename, 'w')
    with output_file:  
        writer = csv.writer(output_file)
        writer.writerow(['Source', 'Target'])
        writer.writerows(edges)
        
    return edges

In [None]:
edges = paths_edges_to_csv([path], './path_edges.csv')