In [126]:
! pip install gensim
! pip install networkx



In [127]:
import numpy as np
import pandas as pd

In [128]:
dataset = pd.read_csv('Iron_dealers_data.csv')
dataset['Value'] = dataset['Value'].astype(int)
dataset

Unnamed: 0,Seller ID,Buyer ID,Value
0,1309,1011,1225513
1,1309,1011,1179061
2,1309,1011,1119561
3,1309,1011,1200934
4,1309,1011,1658957
...,...,...,...
130530,1344,1390,212390
130531,1914,1390,28739
130532,1914,1390,46861
130533,1914,1390,10585


In [129]:
sell_buy = {}
transactions = {}

for row in dataset.iterrows():
    
    row = row[1]
    
    if row['Seller ID'] not in sell_buy:
        sell_buy[row['Seller ID']] = []

    sell_buy[row['Seller ID']].append(row['Buyer ID'])

    if (row['Seller ID'], row['Buyer ID']) not in transactions:
        transactions[(row['Seller ID'], row['Buyer ID'])] = []
    
    transactions[(row['Seller ID'], row['Buyer ID'])].append(row['Value'])

for key in sell_buy.keys():
    sell_buy[key] = list(set(sell_buy[key]))

In [130]:
# replacing multiple edges with the sum of values of all edges

for key in transactions.keys():
    transactions[key] = sum(transactions[key])

In [131]:
two_cycles = []

for key in sell_buy.keys():
    js = sell_buy[key]
    for j in js:
        if j in sell_buy.keys() and key in sell_buy[j] and (j,key) not in two_cycles:
            two_cycles.append((key, j))

one_directed_edge = []
for v in transactions.keys():
    if v not in two_cycles:
        one_directed_edge.append(v)

In [132]:
len(two_cycles)

318

In [133]:
three_rep_cycles = []
two_rep_cycles = []

for vertices in two_cycles:
    a = vertices[0]
    b = vertices[1]

    for v in two_cycles:
        if b in v and v!=vertices:
            m1 , m2 = v
            c = m1 if m1!=b else m2
            if (a,c) in two_cycles or (c,a) in two_cycles:
                three_rep_cycles.append([(a,b) , (b,a) , (a,c) , (c,a) , (b,c) , (c,b)])
            elif (a,c) in one_directed_edge:
                two_rep_cycles.append([(a,b) , (b,a) , (a,c) , (b,c) , (c,b)])
            elif (c,a) in one_directed_edge:
                two_rep_cycles.append([(a,b) , (b,a) , (c,a) , (b,c) , (c,b)])

In [134]:
cycles = []
one_rep_cycles = []

for vertices in one_directed_edge:
    a = vertices[0]
    b = vertices[1]
    
    for v in one_directed_edge:
        if v[0] == b and (v[1] , a) in one_directed_edge:
            cycles.append([(a,b) , (b,v[1]) , (v[1],a)])
        elif v[0] == b and ((v[1] , a) in two_cycles or (a , v[1]) in two_cycles):
            one_rep_cycles.append([(a,b) , (b,v[1]) , (v[1],a) , (a,v[1])])

In [135]:
len(cycles)

2040

In [136]:
len(one_rep_cycles)

728

In [137]:
weights_per_edge = {}

for k in transactions.keys():
    weights_per_edge[k] = 1
    
for cycle in cycles:
    for v in cycle:
        weights_per_edge[v] += 1

for cycle in one_rep_cycles:
    for v in cycle:
        weights_per_edge[v] += 2

for cycle in two_rep_cycles:
    for v in cycle:
        weights_per_edge[v] += 3
    
for cycle in three_rep_cycles:
    for v in cycle:
        weights_per_edge[v] += 4


In [138]:
def within_range(x , y , r):

    minim = min(x,y)
    maxim = max(x,y)

    if maxim <= (1+r) * minim:
        return True
    return False

In [139]:
for v in two_cycles:
    a = v[0]
    b = v[1]
    weights_per_edge[(a,b)] += 5
    weights_per_edge[(b,a)] += 5

    if within_range(transactions[(a,b)] , transactions[(b,a)] , 0.05):
        weights_per_edge[(a,b)] += 8
        weights_per_edge[(b,a)] += 8

In [140]:
for cycle in cycles:
    comp1 = within_range(transactions[cycle[0]] , transactions[cycle[1]] , 0.05)
    comp2 = within_range(transactions[cycle[1]] , transactions[cycle[2]] , 0.05)
    comp3 = within_range(transactions[cycle[2]] , transactions[cycle[0]] , 0.05)

    c = comp1 + comp2 + comp3

    weights_per_edge[v] += c

for cycle in one_rep_cycles:
    comp1 = within_range(transactions[cycle[0]] , transactions[cycle[1]] , 0.05)
    comp2 = within_range(transactions[cycle[1]] , transactions[cycle[2]] , 0.05)
    comp3 = within_range(transactions[cycle[2]] , transactions[cycle[0]] , 0.05)

    c = comp1 + comp2 + comp3

    weights_per_edge[v] += c

for cycle in two_rep_cycles:
    comp1 = within_range(transactions[cycle[0]] , transactions[cycle[2]] , 0.05)
    comp2 = within_range(transactions[cycle[2]] , transactions[cycle[4]] , 0.05)
    comp3 = within_range(transactions[cycle[4]] , transactions[cycle[0]] , 0.05)

    c = comp1 + comp2 + comp3

    weights_per_edge[v] += c
    

for cycle in three_rep_cycles:
    comp1 = within_range(transactions[cycle[0]] , transactions[cycle[2]] , 0.05)
    comp2 = within_range(transactions[cycle[2]] , transactions[cycle[4]] , 0.05)
    comp3 = within_range(transactions[cycle[4]] , transactions[cycle[0]] , 0.05)

    c = comp1 + comp2 + comp3

    weights_per_edge[v] += c

In [141]:
for vertices in two_cycles:
    a = vertices[0]
    b = vertices[1]

    w1 = weights_per_edge[(a,b)]
    w2 = weights_per_edge[(b,a)]

    if w1 > w2:
        del weights_per_edge[(b,a)]
    else:
        del weights_per_edge[(a,b)]

In [142]:
weights_per_edge

{(1309, 1011): 1,
 (1259, 1011): 1,
 (1568, 1011): 1,
 (1147, 1011): 10,
 (1393, 1011): 1,
 (1039, 1011): 13,
 (1042, 1005): 9,
 (1045, 1018): 1,
 (1256, 1074): 1,
 (1668, 1074): 1,
 (1163, 1074): 10,
 (1007, 1074): 15,
 (1832, 1074): 1,
 (1488, 1074): 6,
 (1801, 1074): 1,
 (1944, 1074): 1,
 (1659, 1048): 1,
 (1210, 1048): 4,
 (1205, 1057): 4,
 (1220, 1032): 1,
 (1837, 1061): 1,
 (1017, 1075): 1,
 (1023, 1075): 7,
 (2085, 1075): 1,
 (1503, 1075): 1,
 (2088, 1075): 1,
 (1350, 1075): 1,
 (1327, 1075): 7,
 (1666, 1075): 1,
 (1056, 1075): 7,
 (1748, 1075): 1,
 (1658, 1075): 1,
 (1327, 1003): 3,
 (2085, 1003): 1,
 (1228, 1003): 1,
 (1748, 1003): 1,
 (1309, 1003): 7,
 (1074, 1003): 3,
 (1138, 1003): 1,
 (1089, 1003): 3,
 (1002, 1003): 3,
 (1623, 1003): 1,
 (1076, 1003): 7,
 (1264, 1003): 1,
 (1875, 1003): 1,
 (1004, 1003): 4,
 (1595, 1003): 1,
 (1039, 1004): 8,
 (1944, 1004): 1,
 (1259, 1004): 1,
 (1090, 1004): 1,
 (1138, 1004): 23,
 (1051, 1004): 1,
 (1101, 1004): 20,
 (1488, 1004): 7,
 (10

In [143]:
to_write = []

for v in weights_per_edge.keys():
    to_write.append(str(v[0]) + " " + str(v[1]) + " " + str(weights_per_edge[v]) + "\n")

In [144]:
with open('graph.txt' , 'w') as f:
    f.writelines(to_write)

In [2]:
!python main.py --input karate.edgelist --output embedding.emd

Walk iteration:
1 / 10
2 / 10
3 / 10
4 / 10
5 / 10
6 / 10
7 / 10
8 / 10
9 / 10
10 / 10
Traceback (most recent call last):
  File "/Users/tanmaygoyal/Desktop/Assignments and Events/Fraud Analytics/Assignment1/main.py", line 104, in <module>
    main(args)
  File "/Users/tanmaygoyal/Desktop/Assignments and Events/Fraud Analytics/Assignment1/main.py", line 100, in main
    learn_embeddings(walks)
  File "/Users/tanmaygoyal/Desktop/Assignments and Events/Fraud Analytics/Assignment1/main.py", line 87, in learn_embeddings
    model = Word2Vec(walks, vector_size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, epochs=args.iter)
  File "/Users/tanmaygoyal/opt/anaconda3/envs/test_env/lib/python3.9/site-packages/gensim/models/word2vec.py", line 425, in __init__
    self.build_vocab(corpus_iterable=corpus_iterable, corpus_file=corpus_file, trim_rule=trim_rule)
  File "/Users/tanmaygoyal/opt/anaconda3/envs/test_env/lib/python3.9/site-packages/gensim/models/word2ve