# Read files

In [1]:
%%time

from read_files import read_into_list_of_tuples
from graph_algorithms import Graph

# set file names
batch  = '../paymo_input/batch_payment.txt'
stream = '../paymo_input/stream_payment.txt'

# Read files
# Turn on verboisity to see the lines that did not conform to the csv format (Note: 
# it's the same 5 or so lines over and over again)
batch0 = read_into_list_of_tuples (batch, verbose = False)
stream = read_into_list_of_tuples (stream, verbose = False) 

# attach
batch = Graph (batch0)

CPU times: user 14.1 s, sys: 419 ms, total: 14.6 s
Wall time: 14.6 s


In [2]:
%%time

reference = []
ans_ref = []

graph = batch.copy ()
for pair in stream:
    if not graph.if_lte_deg1 (pair):
        reference.append (pair)
        ans_ref.append (0)
    else:
        ans_ref.append (1)
    graph.add_edge (pair)
print (len (reference))
print (graph.is_self_consistent ())

1389471
True
CPU times: user 9.85 s, sys: 72 ms, total: 9.92 s
Wall time: 9.93 s


# Features

In [3]:
end = '\n'
msg_true = "trusted" + end
msg_false = "unverified" + end

In [4]:
# start w/ fresh copy
features = [batch.copy (), batch.copy (), batch.copy ()]
# name of output file
output_files = ['output1.txt', 'output2.txt', 'output3.txt']
# save the new transactions in list for future comparisons
new_pairs = [[], [], []]
# save the answers in list for comparison
answers = [[], [], []]

## Feature 1
When anyone makes a payment to another user, they'll be notified if they've never made a transaction with that user before.

"unverified: You've never had a transaction with this user before. Are you sure you would like to proceed with this payment?"

In [5]:
%%time
n = 0
out = open (output_files [0], 'w')


for pair in stream:
    # First, check if prior transaction exists. If so, no need to add edge to graph

    if features [n].degree_lte (pair, degree = 1): # transaction existed
        answers [n].append (1)
        out.write (msg_true)
    else:
        answers [n].append (0)
        out.write (msg_false)

        # add edge to list new_pair [1] for later comparison
        new_pairs [n].append (pair)
        
        # add edge after determining degree of separation
        features [n].add_edge (pair)

out.close ()
        
print ('number of new nodes: {}\n'.format (features [n].num_nodes - batch.num_nodes))

number of new nodes: 3252

CPU times: user 6.63 s, sys: 100 ms, total: 6.73 s
Wall time: 6.74 s


In [6]:
n = 0
features [n].is_self_consistent (), len (new_pairs [n]) == 1389471

(True, True)

## Feature 2

The PayMo team is concerned that these warnings could be annoying because there are many users who haven't had transactions, but are still in similar social networks. To account for this, PayMo would like you to also implement this feature. When users make a payment, they'll be notified when the other user is outside of their "2nd-degree network".

"unverified: This user is not a friend or a "friend of a friend". Are you sure you would like to proceed with this payment?"

In [9]:
%%time
n = 1

with open (output_files [n], 'w') as output:
    for pair in stream:
        
        # check if transaction had NOT occured before
        if not features [n].degree_lte (pair, degree = 1):
            # add edge to list new_pair [1]
            new_pairs [n].append (pair)
            # add edge after determining degree of separation
            features [n].add_edge (pair)
        
        # determine if <= 2 degrees of separation
        if features [n].degree_lte (pair, degree = 2):
            output.write (msg_true)
        else:
            output.write (msg_false)
            
            
            
print ('number of new nodes: {}\n'.format (features [n].num_nodes - batch.num_nodes))

number of new nodes: 3252

CPU times: user 8.64 s, sys: 108 ms, total: 8.75 s
Wall time: 8.77 s


In [10]:
n = 1
features [n].is_self_consistent (), len (new_pairs [n]) == 1389471, new_pairs [n] == reference

(True, True, True)

## Streaming version

In [11]:
%%time

graph = batch.copy ()

verbose = False
file = open ('../paymo_input/stream_payment.txt', 'r')
output = open ('tmp.file', 'w')
np = []

line = file.readline () #header

for line in file:
    
    try:
        # get the pair of IDs from line
        pair = tuple (map (lambda x: int (x), line.split (',') [1:3]))
        
        
        if graph.degree_lte (pair, degree = 1):
            output.write (msg_true)
        else:
            # determine if <= 2 degrees of separation
            lte = graph.degree_lte (pair, degree = 2)
            output.write (msg_true if lte else msg_false)
            np.append (pair) # add edge to list new_pair [1] for later comparison
            
            # add edge after determining degree of separation
            graph.add_edge (pair)
            
    except:
        if verbose: print ('Error: not a csv formatted line: ', line)

file.close ()
output.close ()

CPU times: user 15.8 s, sys: 164 ms, total: 16 s
Wall time: 16 s


In [12]:
graph.is_self_consistent (), len (np) == 1389471, np == reference

(True, True, True)

## Feature 3

More generally, PayMo would like to extend this feature to larger social networks. Implement a feature to warn users only when they're outside the "4th degree friends network."

In [14]:
%%time
n = 2

with open (output_files [n], 'w') as output:
    for pair in stream:
        # First, check if prior transaction exists. If so, no need to add edge to graph
        if features [n].degree_lte (pair, degree = 1):
            output.write (msg_true)
        else:
            # determine if <= 4 degrees of separation
            lte = features [n].degree_lte (pair, degree = 4)
            output.write (msg_true if lte else msg_false)

            # add edge to list new_pair [1]
            new_pairs [n].append (pair)
            
            # add edge after determining degree of separation
            features [n].add_edge (pair)

            
print ('number of new nodes: {}\n'.format (features [n].num_nodes - batch.num_nodes))

number of new nodes: 3252

CPU times: user 6min 42s, sys: 324 ms, total: 6min 42s
Wall time: 6min 43s


In [15]:
features [n].is_self_consistent (), len (new_pairs [n]) == 1389471, new_pairs [n] == reference

(True, True, True)

# Check if the graphs are the same

In [16]:
features [0].is_equal_to (features [1])

True

In [17]:
features [2].is_equal_to (features [1])

True