# Synthetic data generator for asset networks
 Here we first create a list of assets; second we distribute randomly their ownership (fractional).

In [32]:
import random
import string

N = 30 # number of nodes
c = 0.2 # controll parameter for the connectivity U(0,c)


# code to generate a single random string
def get_random_string(length):
    # Only upper case
    letters = string.ascii_uppercase
    result_str = ''.join(random.choice(letters) for i in range(length))
    # print random string
    #print(result_str)
    return result_str

# code to generate a list of random strings. Return approx "N=length" elements because identical ones are dropped
def gen_random_asset_list(length,name_length):
    my_asset_list = []
    for i in range(length):
        asset_name = get_random_string(name_length)
        my_asset_list.append(asset_name)
    #print(my_asset_list)
    
    #remove identical entries by passing the generated list into a set and back to a list.
    new_asset_list = list(set(my_asset_list))
    return new_asset_list


In [33]:
# random edge probability per node

import numpy as np

assetlist = gen_random_asset_list(N,2)
# generate a list of probabilities to a edge to exists from each node:
node_edge_prob = []
count = 0
for i in assetlist:
    node_edge_prob.append(np.random.uniform(0,c))
    print(i," ",node_edge_prob[count])
    count=count+1

IP   0.09671308275040216
SV   0.01636687900087619
QY   0.023757731392409068
EE   0.012599713706235539
VP   0.19018248292803172
WQ   0.13681158967623105
LT   0.09238206052507532
OK   0.05611158693292087
YA   0.13643769732182537
GY   0.003335990014873214
RO   0.12578209528940687
GJ   0.14896826584680334
SN   0.129752356899211
FM   0.06973461864094188
DL   0.006885513136553567
WS   0.008104944036050976
FC   0.14794620386905397
LI   0.16619831445178246
FV   0.16683252899802503
RM   0.16341740360701398
GL   0.050466868710478036
SA   0.0740379331352409
KY   0.1378707927442332
YW   0.1353994995358157
GX   0.15055476276544705
KX   0.18296719442025025
WN   0.1684973460219613
WP   0.004121253982151796


In [34]:
#generating the edges
edges = []

count = 0
for i in assetlist:
    for j in assetlist:
        if np.random.uniform(0,1) < node_edge_prob[count] and i!=j:
            edges.append((i,j))
    count=count+1

print(edges)

[('IP', 'LT'), ('IP', 'OK'), ('IP', 'GY'), ('IP', 'DL'), ('VP', 'WQ'), ('VP', 'SN'), ('VP', 'KY'), ('WQ', 'VP'), ('WQ', 'GJ'), ('WQ', 'FM'), ('WQ', 'WP'), ('LT', 'GJ'), ('LT', 'FM'), ('LT', 'FC'), ('LT', 'YW'), ('OK', 'KX'), ('YA', 'SV'), ('YA', 'LT'), ('YA', 'DL'), ('YA', 'YW'), ('YA', 'WP'), ('RO', 'GL'), ('GJ', 'WQ'), ('GJ', 'OK'), ('GJ', 'RO'), ('GJ', 'SN'), ('GJ', 'DL'), ('GJ', 'LI'), ('GJ', 'SA'), ('SN', 'VP'), ('SN', 'LI'), ('SN', 'FV'), ('SN', 'KX'), ('SN', 'WN'), ('FM', 'QY'), ('FM', 'RM'), ('FM', 'GL'), ('FC', 'VP'), ('FC', 'YA'), ('FC', 'SN'), ('FC', 'YW'), ('LI', 'IP'), ('LI', 'YA'), ('LI', 'RO'), ('LI', 'FM'), ('LI', 'GL'), ('LI', 'SA'), ('LI', 'WP'), ('FV', 'OK'), ('RM', 'IP'), ('RM', 'EE'), ('RM', 'FC'), ('GL', 'FC'), ('SA', 'IP'), ('SA', 'QY'), ('SA', 'LT'), ('SA', 'WP'), ('KY', 'WQ'), ('KY', 'LI'), ('KY', 'YW'), ('KY', 'WN'), ('YW', 'SV'), ('YW', 'YA'), ('YW', 'RM'), ('GX', 'SA'), ('GX', 'YW'), ('KX', 'RO'), ('KX', 'GJ'), ('KX', 'SN'), ('KX', 'WS'), ('KX', 'GL'), ('KX'

In [35]:
#generate edges weigths: percentage over ownership

weights = [0]*len(edges)
print(weights)
assets_value = []

#this will split the value in a given number of parts
def split_num_random(value,num_parts):
    dividing_points = []
    parts = []
    if num_parts == 1:
        parts.append(value)
        
    elif num_parts > 1:
        for i in range(0, num_parts-1):
            dividing_points.append(np.random.uniform(0,value))
        dividing_points.sort()
        #print(dividing_points)
        old_value = 0
        for i in range(len(dividing_points)):
            parts.append(dividing_points[i]-old_value)
            old_value = dividing_points[i]
        parts.append(value-old_value)
        #print(parts)
    return parts


#here edges and weights vectors must already be define
#this code associate divide the ownership into many parts, each corresponding to an in-vector to a node
def gen_edge_weights(node):
    # count number of connections for a node
    count = 0
    edges_index=[]
    
    for i in range(len(edges)):
        if edges[i][1] == node:
            edges_index.append(i)
            count=count+1
    
    #print(count)
    parts_local = split_num_random(1,count)
    #print(parts_local)
    
    count = 0
    for i in edges_index:
        #print(i)
        # weights are changed directly over the external variable
        weights[i]=parts_local[count]
        count=count+1

    return True

#weights and node values
for i in assetlist:
    #generate the weights of connections
    health=gen_edge_weights(i)
    #generate the node value (between 1 and 100)
    #assets_value.append(np.random.uniform(1,100))
    
    #for testing, all worth 100
    assets_value.append(100)
    
print(weights)
print(assets_value)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0.596998531357922, 0.10483816584038752, 1, 0.5655071432579051, 0.16041247441394602, 0.283604478265729, 0.019125734102951264, 0.2635864246301869, 0.21358974199103464, 0.8009666358136861, 0.0686982560565268, 0.44016551976526075, 0.05895762809512728, 0.9285715271345808, 0.23581724177511176, 0.3687396178814931, 0.17334161807480342, 0.019185251946500226, 0.3711127462687642, 0.22861212647880924, 0.35757086264854665, 0.1468923357803701, 0.3992285275821551, 0.17104815658069017, 0.0917131157584018, 0.4031029954349462, 0.0633801104733307, 0.07088431388001026, 0.7263661641542007, 0.38161498813343575, 0.22352727629846636, 1, 0.6312603821185069, 0.669849832708996, 0.3304761335523846, 0.31484119713770586, 0.06807426058140675, 0.35479858723637736, 0.13365443592998705, 0.0

In [36]:
# plot the graph

import networkx as nx
#import matplotlib.pyplot as plt
from pyvis.network import Network

G=nx.DiGraph()
G.add_edges_from(edges)
count=0
print(G)
net = Network(directed =True,notebook=True, cdn_resources='remote')
net.from_nx(G)
net.repulsion()
net.show("test.html")



DiGraph with 28 nodes and 77 edges
test.html


In [37]:
# setting an output to be loaded in Gremling as a dataset
# here I will format the output in the graphML standard
import os
import time

G2=nx.DiGraph()
for i in range(len(assetlist)):
    G2.add_node(assetlist[i],value=assets_value[i])
    
for i in range(len(edges)):
    G2.add_edge(edges[i][0],edges[i][1],fraction=weights[i])
    
#print(nx.get_node_attributes(G2,"value"))
#print("\n",nx.get_edge_attributes(G2,"fraction"))

time_stamp = time.strftime("%d-%H%M%S") 
file_name = "./data/graph_"+time_stamp +".graphml"
output_file = open(file_name,"w")
nx.write_graphml_lxml(G2,file_name)
output_file.close()