In [1]:
from read_processing_files import read_into_list_of_tuples
from graph_algorithms import Graph

In [2]:
def pass_fail (logic, message = ''):
    if logic:
        print ('[PASS]: ', end = '')
    else:
        print ('[FAIL]: ', end = '')
    print (message)

# Read files

In [3]:
%%time

# read files
batch  = '../paymo_input/batch_payment.txt'
stream = '../paymo_input/stream_payment.txt'

batch0 = read_into_list_of_tuples (batch)
stream = read_into_list_of_tuples (stream)

# build graphs
batch    = Graph (batch0)
features = batch.copy ()

CPU times: user 21.5 s, sys: 683 ms, total: 22.2 s
Wall time: 22.8 s


# Dynamic programming
### Idea
The adjacency list 'adj_list' is that of the graph of 1 degree of separation. Using just this to calculate the distance between 2 arbitrary node turns out to be too computationally expensive (for my taste at least) when dealing with the stream_processing.txt file.

So, the idea is to build adjacency list of 2 degrees of separation as well. That should eliminate a good deal of redundant computations and hopefully the result will be noticeably faster. Building adjacency list of 4th order turned out to be **way too** expensive computationally, and so I decided not to bother with it.

### Test Problem
<img src="image/example_graph.jpg" width="50%">
Let the graph G be defined by the following list of edges:
$$
\text{edges} (G) =
[(1, 13), (1, 3), (1, 4),
 (2, 12),
 (4, 13),
 (3, 5),
 (5, 6), (5, 11),
 (6, 8), (6, 9), (6, 10),
 (7, 8),
 (10, 11)]
$$

In [4]:
# exclusive 1st order adjacency list for G (does not include nodes separated by degree zero)
adj_1_exclusive = {1 : {3, 4, 13}, 2 : {12}, 3 : {1, 5}, 4 : {1, 13}, 5 : {3, 6, 11}, 6 : {5, 8, 9, 10},
7 : {8, 12}, 8 : {6, 7}, 9 : {6}, 10: {6, 11}, 11: {5, 10}, 12: {2, 7}, 13: {1, 4}}

# inclusive 1st order adjacency list for G
adj_1 = {}
for key in adj_1_exclusive.keys ():
    adj_1 [key] = {key} | adj_1_exclusive [key]

# exclusive 2nd order adjacency list for G
adj_2_exclusive = {1 : {5}, 2 : {7}, 3 : {4, 6, 11, 13}, 4 : {3}, 5 : {1, 8, 9, 10}, 6 : {3, 7, 11},
7 : {2, 6}, 8 : {5, 9, 10, 12}, 9 : {5, 8, 10}, 10: {5, 8, 9}, 11: {3, 6}, 12: {8}, 13: {3}}

# inclusive 2nd order adjacency list for G
adj_2 = {}
for key in adj_1.keys ():
    adj_2 [key] = adj_1 [key] | adj_2_exclusive [key]

In [5]:
# just confirming that I didn't accidentally point one to the other
adj_1_exclusive != adj_1, adj_2_exclusive != adj_2

(True, True)

In [6]:
edges = [(1, 13), (1, 3), (1, 4), (2, 12), (4, 13), (3, 5), (5, 6),
         (5, 11), (6, 8), (6, 9), (6, 10), (7, 8), (7, 12), (10, 11)]
G = Graph ({})

# create graph G from list of edges
G.add_edges (edges)

# compare inclusive adjacency lists or order 1 & 2
pass_fail (G.adj  == adj_1, "G.adj after adding edges")
passed = True
for key in G.adj.keys ():
    if G.friends_of_friends (key) != adj_2 [key]:
        pass_fail (False, "G.adj2 after adding edges")
        passed = False
if passed: pass_fail (True, "method friends_of_friends check")
    
# Check for self-consistency
pass_fail (G.is_self_consistent (), "Self-consistency of G")

[PASS]: G.adj after adding edges
[PASS]: method friends_of_friends check
[PASS]: Self-consistency of G


### Repeat same test as before, but w/ the ordered pairs reversed

In [7]:
edges = [(y, x) for x, y in edges]
G = Graph ({})

# create graph G from list of edges
G.add_edges (edges)

# compare inclusive adjacency lists or order 1 & 2
pass_fail (G.adj  == adj_1, "G.adj after adding edges")
passed = True
for key in G.adj.keys ():
    if G.friends_of_friends (key) != adj_2 [key]:
        pass_fail (False, "G.adj2 after adding edges")
        passed = False
if passed: pass_fail (True, "method friends_of_friends check")

# Check for self-consistency
pass_fail (G.is_self_consistent (), "Self-consistency of G")

[PASS]: G.adj after adding edges
[PASS]: method friends_of_friends check
[PASS]: Self-consistency of G


# Tests and examples

list of tests and excamples:
1. test case for graph creation using adjacency list
2. Confirm algo can handle being given node not in graph
3. Check how long it takes to confirm 2 nodes are separated by degree 4 (**24 micro seconds**!)

### test case for function adjacency_dict
```
a
| \
b--c--d--e
```
connections: (a, b), (a, c), (b, c), (c, d), (d, e)

or, as 2 lists: (a, a, b, c, d) and (b, c, c, d, e)

In [11]:
# Use edges to test out add_edges method
ex_edges = [('a', 'b'), ('a', 'c'), ('b', 'c'), ('c', 'd'), ('d', 'e')]
ex_graph = Graph (ex_edges)
#ex_graph.add_edges (ex_edges)

pass_fail (ex_graph.distance (('a', 'e')) == 3, 'method distance')
pass_fail (ex_graph.distance_lte (('a', 'e'), 3), 'method distance')

# Add edge w/ new node 'f'
ex_graph.add_edge (('f', 'e'))

pass_fail (ex_graph.num_nodes == 6,
           'num_nodes update after adding new edge')
# inclusive adjacency list
pass_fail (ex_graph.adj == {'a': {'a', 'b', 'c'},
                            'b': {'a', 'b', 'c'},
                            'c': {'a', 'b', 'c', 'd'},
                            'd': {'c', 'd', 'e'}, 
                            'e': {'d', 'e', 'f'},
                            'f': {'e', 'f'}},
           'adj update after adding new edge')
# inclusive adjacency list of order 2
ex_adj_2 = {'a': {'a', 'b', 'c', 'd'},
            'b': {'a', 'b', 'c', 'd'},
            'c': {'a', 'b', 'c', 'd', 'e'},
            'd': {'a', 'b', 'c', 'd', 'e', 'f'},
            'e': {'c', 'd', 'e', 'f'},
            'f': {'d', 'e', 'f'}}

for key in ex_graph.adj.keys ():
    if ex_graph.friends_of_friends (key) != ex_adj_2 [key]:
        pass_fail (False, "checking method friends_of_friends after adding edges")
        passed = False
if passed: pass_fail (True, "checking method friends_of_friends after adding new edges")


[PASS]: method distance
[PASS]: method distance
[PASS]: num_nodes update after adding new edge
[PASS]: adj update after adding new edge
[PASS]: checking method friends_of_friends after adding new edges


### Confirm algo can handle being given node not in graph

In [25]:
pair = (49466, 27060006)

In [26]:
%time batch.distance (pair) == -1

CPU times: user 9 µs, sys: 0 ns, total: 9 µs
Wall time: 11.9 µs


True

In [27]:
%time batch.distance_lte (pair, n = 100) == False

CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 11.4 µs


True

In [28]:
%time batch.if_lte_deg4 (pair) == False

CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 11.2 µs


True

### Check how long it takes to confirm 2 nodes are separated by degree 3

In [29]:
dist3 = []
for pair in stream [:1000]:
    if batch.distance (pair, 3) == 3:
        dist3.append (pair)

In [30]:
from numpy.random import randint
k = randint (0, len (dist3))
%timeit batch.if_lte_deg4 (dist3 [k])

775 µs ± 28.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Check how long it takes to confirm 2 nodes are separated by degree 4

In [31]:
pair = (49466, 2706)

In [32]:
batch.distance (pair) == 4

True

In [33]:
%timeit batch.distance (pair)

36.6 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [34]:
%timeit batch.if_lte_deg4 (pair)

23.7 µs ± 596 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [36]:
%timeit batch.degree_lte (pair, 4)

24.2 µs ± 1.34 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [37]:
36.6e3 / 24.2

1512.396694214876