In [1]:
from antifraud import read_adj_dict_from_file, read_into_list_of_tuples, adjacency_dict
from graph_algorithms import Graph

In [2]:
def pass_fail (logic, message = ''):
    if logic:
        print ('[PASS]: ', end = '')
    else:
        print ('[FAIL]: ', end = '')
    print (message)

# Read files

In [3]:
%%time

# read files
batch  = '../paymo_input/batch_payment.txt'
stream = '../paymo_input/stream_payment.txt'

batch0 = read_adj_dict_from_file (batch)
#stream = read_into_list_of_tuples (stream)

# build graphs
batch    = Graph (batch0)
features = batch.copy ()

CPU times: user 24.8 s, sys: 1.19 s, total: 25.9 s
Wall time: 26 s


# Dynamic programming
### Idea
The adjacency list 'adj_list' is that of the graph of 1 degree of separation. Using just this to calculate the distance between 2 arbitrary node turns out to be too computationally expensive (for my taste at least) when dealing with the stream_processing.txt file.

So, the idea is to build adjacency list of 2 degrees of separation as well. That should eliminate a good deal of redundant computations and hopefully the result will be noticeably faster. Building adjacency list of 4th order turned out to be **way too** expensive computationally, and so I decided not to bother with it.

### Test Problem
<img src="image/example_graph.jpg" width="50%">
Let the graph G be defined by the following list of edges:
$$
\text{edges} (G) =
[(1, 13), (1, 3), (1, 4),
 (2, 12),
 (4, 13),
 (3, 5),
 (5, 6), (5, 11),
 (6, 8), (6, 9), (6, 10),
 (7, 8),
 (10, 11)]
$$

In [4]:
# exclusive 1st order adjacency list for G (does not include nodes separated by degree zero)
adj_1_exclusive = {1 : {3, 4, 13}, 2 : {12}, 3 : {1, 5}, 4 : {1, 13}, 5 : {3, 6, 11}, 6 : {5, 8, 9, 10},
7 : {8, 12}, 8 : {6, 7}, 9 : {6}, 10: {6, 11}, 11: {5, 10}, 12: {2, 7}, 13: {1, 4}}

# inclusive 1st order adjacency list for G
adj_1 = {}
for key in adj_1_exclusive.keys ():
    adj_1 [key] = {key} | adj_1_exclusive [key]

# exclusive 2nd order adjacency list for G
adj_2_exclusive = {1 : {5}, 2 : {7}, 3 : {4, 6, 11, 13}, 4 : {3}, 5 : {1, 8, 9, 10}, 6 : {3, 7, 11},
7 : {2, 6}, 8 : {5, 9, 10, 12}, 9 : {5, 8, 10}, 10: {5, 8, 9}, 11: {3, 6}, 12: {8}, 13: {3}}

# inclusive 2nd order adjacency list for G
adj_2 = {}
for key in adj_1.keys ():
    adj_2 [key] = adj_1 [key] | adj_2_exclusive [key]

In [5]:
# just confirming that I didn't accidentally point one to the other
adj_1_exclusive != adj_1, adj_2_exclusive != adj_2

(True, True)

In [6]:
edges = [(1, 13), (1, 3), (1, 4), (2, 12), (4, 13), (3, 5), (5, 6),
         (5, 11), (6, 8), (6, 9), (6, 10), (7, 8), (7, 12), (10, 11)]
G = Graph ({})

# create graph G from list of edges
G.add_edges (edges)

# compare inclusive adjacency lists or order 1 & 2
pass_fail (G.adj  == adj_1, "G.adj after adding edges")
pass_fail (G.adj2 == adj_2, "G.adj2 after adding edges")

# Check for self-consistency
pass_fail (G.is_self_consistent (), "Self-consistency of G")

[PASS]: G.adj after adding edges
[PASS]: G.adj2 after adding edges
[PASS]: Self-consistency of G


# Tests and examples

list of tests and excamples:
1. test case for function adjacency_dict
2. Confirm algo can handle being given node not in graph
3. Check how long it takes to confirm 2 nodes are separated by degree 4 (**67ms**!)
4. Check how long it takes to completely traverse the graph given in batch_processing.txt    (**< 200ms!**)

### test case for function adjacency_dict
```
a
| \
b--c--d--e
```
connections: (a, b), (a, c), (b, c), (c, d), (d, e)

or, as 2 lists: (a, a, b, c, d) and (b, c, c, d, e)

In [8]:
#ex_edges = adjacency_dict (['a', 'a', 'b', 'c', 'd'],
#                           ['b', 'c', 'c', 'd', 'e'])
#ex_graph = Graph (ex_edges)

# Use edges to test out add_edges method
ex_edges = [('a', 'b'), ('a', 'c'), ('b', 'c'), ('c', 'd'), ('d', 'e')]
ex_graph = Graph ()
ex_graph.add_edges (ex_edges)

pass_fail (ex_graph.distance (('a', 'e')) == 3, 'method distance')
pass_fail (ex_graph.distance_lte (('a', 'e'), 3), 'method distance')

# Add edge w/ new node 'f'
ex_graph.add_edge (('f', 'e'))

pass_fail (ex_graph.num_nodes == 6,
           'num_nodes update after adding new edge')
# inclusive adjacency list
pass_fail (ex_graph.adj == {'a': {'a', 'b', 'c'},
                            'b': {'a', 'b', 'c'},
                            'c': {'a', 'b', 'c', 'd'},
                            'd': {'c', 'd', 'e'}, 
                            'e': {'d', 'e', 'f'},
                            'f': {'e', 'f'}},
           'adj update after adding new edge')
# inclusive adjacency list of order 2
pass_fail (ex_graph.adj2 == {'a': {'a', 'b', 'c', 'd'},
                             'b': {'a', 'b', 'c', 'd'},
                             'c': {'a', 'b', 'c', 'd', 'e'},
                             'd': {'a', 'b', 'c', 'd', 'e', 'f'},
                             'e': {'c', 'd', 'e', 'f'},
                             'f': {'d', 'e', 'f'}},
           'adj2 update after adding new edge ')

[PASS]: method distance
[PASS]: method distance
[PASS]: num_nodes update after adding new edge
[PASS]: adj update after adding new edge
[PASS]: adj2 update after adding new edge 


In [9]:
adjacency_dict (['a', 'a', 'b', 'c', 'd'],
                ['b', 'c', 'c', 'd', 'e']) == {'a': {'b', 'c'},
                                               'b': {'a', 'c'},
                                               'c': {'a', 'b', 'd'},
                                               'd': {'c', 'e'},
                                               'e': {'d'}}

True

### Confirm algo can handle being given node not in graph

In [10]:
pair = (49466, 27060006)

In [11]:
%time batch.distance (pair)

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 8.82 µs


-1

In [12]:
%time batch.distance_lte (pair, n = 100)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 7.39 µs


False

In [13]:
%time batch.if_lte_deg4 (pair)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 7.39 µs


False

### Check how long it takes to confirm 2 nodes are separated by degree 4

In [14]:
pair = (49466, 2706)

In [15]:
%time batch.distance (pair)

CPU times: user 49.4 ms, sys: 7.99 ms, total: 57.4 ms
Wall time: 56.2 ms


4

In [16]:
%time batch.if_lte_deg4 ((49466, 2706))

CPU times: user 20 µs, sys: 1e+03 ns, total: 21 µs
Wall time: 21.9 µs


True