# Synthetic data generator for asset networks
 Here we first create a list of assets; second we distribute randomly their ownership (fractional).

In [260]:
import random
import string

N = 10 # number of nodes
c = 0.2 # controll parameter for the connectivity U(0,c)


# code to generate a single random string
def get_random_string(length):    # Only upper case
    letters = string.ascii_uppercase
    result_str = ''.join(random.choice(letters) for i in range(length))
    # print random string
    #print(result_str)
    return result_str

# code to generate a list of random strings. Return approx "N=length" elements because identical ones are dropped
def gen_random_asset_list(length,name_length):
    my_asset_list = []
    for i in range(length):
        asset_name = get_random_string(name_length)
        my_asset_list.append(asset_name)
    #print(my_asset_list)
    
    #remove identical entries by passing the generated list into a set and back to a list.
    new_asset_list = list(set(my_asset_list))
    return new_asset_list


In [261]:
# random edge probability per node

import numpy as np

assetlist = gen_random_asset_list(N,2)
num_assetlist = []

for i in range(len(assetlist)):
    num_assetlist.append(i)
# generate a list of probabilities to a edge to exists from each node:
node_edge_prob = []
count = 0
for i in assetlist:
    #node_edge_prob.append(np.random.uniform(0,c))
    #for the moment let us make it fixed
    node_edge_prob.append(c)
    print(i," ",num_assetlist[count], " ",node_edge_prob[count])
    count=count+1


    

SF   0   0.2
OM   1   0.2
VT   2   0.2
MP   3   0.2
NZ   4   0.2
YR   5   0.2
ZQ   6   0.2
WS   7   0.2
UX   8   0.2
ZN   9   0.2


In [262]:
#generating the edges
edges = []

count = 0
for i in assetlist:
    for j in assetlist:
        if np.random.uniform(0,1) < node_edge_prob[count] and i!=j:
            edges.append((i,j))
    count=count+1

print(edges)

[('SF', 'WS'), ('OM', 'VT'), ('OM', 'NZ'), ('OM', 'YR'), ('VT', 'SF'), ('VT', 'WS'), ('MP', 'YR'), ('MP', 'WS'), ('YR', 'NZ'), ('YR', 'ZN'), ('ZQ', 'VT'), ('ZQ', 'NZ'), ('UX', 'MP'), ('UX', 'ZQ'), ('ZN', 'UX')]


In [263]:
#generate edges weigths: percentage over ownership

weights = [0]*len(edges)
print(weights)
assets_value = []

#this will split the value in a given number of parts
def split_num_random(value,num_parts):
    dividing_points = []
    parts = []
    if num_parts == 1:
        parts.append(value)
        
    elif num_parts > 1:
        for i in range(0, num_parts-1):
            dividing_points.append(np.random.uniform(0,value))
        dividing_points.sort()
        #print(dividing_points)
        old_value = 0
        for i in range(len(dividing_points)):
            parts.append(dividing_points[i]-old_value)
            old_value = dividing_points[i]
        parts.append(value-old_value)
        #print(parts)
    return parts


#here edges and weights vectors must already be define
#this code associate divide the ownership into many parts, each corresponding to an in-vector to a node
def gen_edge_weights(node):
    # count number of connections for a node
    count = 0
    edges_index=[]
    
    for i in range(len(edges)):
        if edges[i][1] == node:
            edges_index.append(i)
            count=count+1
    
    #print(count)
    parts_local = split_num_random(1,count)
    #print(parts_local)
    
    count = 0
    for i in edges_index:
        #print(i)
        # weights are changed directly over the external variable
        weights[i]=parts_local[count]
        count=count+1

    return True

#weights and node values
for i in assetlist:
    #generate the weights of connections
    health=gen_edge_weights(i)
    #generate the node value (between 1 and 100)
    #assets_value.append(np.random.uniform(1,100))
    
    #for testing, all worth 100
    assets_value.append(100)
    
print(weights)
print(assets_value)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0.25851643679856684, 0.3406406555390943, 0.0003040960850833052, 0.16672700222845704, 1, 0.14893028536439823, 0.833272997771543, 0.5925532778370349, 0.0080044370245963, 1, 0.6593593444609057, 0.9916914668903204, 1, 1, 1]
[100, 100, 100, 100, 100, 100, 100, 100, 100, 100]


In [264]:
# plot the graph

import networkx as nx
#import matplotlib.pyplot as plt
from pyvis.network import Network

G=nx.DiGraph()
G.add_edges_from(edges)
count=0
print(G)
net = Network(directed =True,notebook=True, cdn_resources='remote')
net.from_nx(G)
net.repulsion()
net.show("test.html")



DiGraph with 10 nodes and 15 edges
test.html


In [274]:
# setting an output to be loaded in Gremling as a dataset
# here I will format the output in the graphML standard
import os
import time

G2=nx.DiGraph()
for i in range(len(assetlist)):
    G2.add_node(assetlist[i],value=assets_value[i])
    
for i in range(len(edges)):
    G2.add_edge(edges[i][0],edges[i][1],fraction=weights[i])

# name it with .xml extention instead of .graphml, the g.io() loading function does not to seem to recognize it otherwise
time_stamp = time.strftime("%d-%H%M%S") 
file_name = "D:\\gitrepos\\assetnet\\datagen\data\\graph_"+time_stamp +".xml"
output_file = open(file_name,"w")
nx.write_graphml_lxml(G2,file_name)
output_file.close()

In [275]:
#import jugri

# setup the connect to the Gremlin server
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *

# python and Gremlin commands overlap in their naming. This library will provide the mapping to resolve that
from gremlin_python.process.traversal import Barrier, Bindings, Cardinality, Column, Direction, Operator, Order, P, Pop, Scope, T, WithOptions

# Create a GraphTraversalSource which is the basis for all Gremlin traversals
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection

#directly from python (not a notebook like jupter) do this
#g = traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin','g'))

#only for notebooks, to avoit "loop already running" error, do this:
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
connection = DriverRemoteConnection('ws://localhost:8182/gremlin','g', transport_factory=lambda:AiohttpTransport(call_from_event_loop=True))

g = traversal().withRemote(connection)
g.V().drop().iterate()

In [276]:
# using a test graph to compare results witht he gremlin console: later use the path to the latest graph
#g.io("D:\\gitrepos\\assetnet\\gremlinTest\\graph_test_renamed.xml").read().iterate()
g.io(file_name).read().iterate()

[['io', 'D:\\gitrepos\\assetnet\\datagen\\data\\graph_31-214541.xml'], ['read'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

In [277]:
lista= g.V("UX").out().id_().toList()
lista

['ZQ', 'MP']

In [280]:
lista_init_values= g.V("UX").outE().values("fraction").toList()
lista_init_values

[1, 1]

In [281]:
mydic = {}
for i in range(len(lista)):
    mydic[lista[i]] = lista_init_values[i]
print(mydic)

{'ZQ': 1, 'MP': 1}


In [283]:
mydic["ZQ"]

1

In [285]:
from gremlin_python.process.traversal import Barrier, Bindings, Cardinality, Column, Direction, Operator, Order, P, Pop, Scope, T, WithOptions

lista3 = g.V(lista).until(__.has(T.id,"UX")).repeat(__.out().simplePath()).path().limit(5).toList()
lista3

[path[v[MP], v[YR], v[ZN], v[UX]]]

In [287]:
# multiply the weights of connections trough the path
lista5 = g.withSack(1).V(lista)\
        .until(__.has(T.id,"UX")).repeat(__.outE().sack(Operator.mult).by("fraction").inV().simplePath())\
        .sack().limit(5).toList()
lista5

[0.833272997771543]

In [288]:
lista_first_element = []
lista_first_value = []
for i in lista3:
    lista_first_element.append(i[0])
    val = g.V(i[0]).id_().toList()
    # val is a list with one string, not a string, so use val[0] to get a simple string
    lista_first_value.append(mydic[val[0]])
print(lista_first_value)
    

[1]


In [289]:
finalValues = []
for i in range(len(lista5)):
    finalValues.append(lista5[i]*lista_first_value[i])
print(finalValues)

sum(finalValues)

[0.833272997771543]


0.833272997771543

In [40]:
# The connection should be closed on shut down to close open connections with connection.close()
connection.close()