# Synthetic data generator for asset networks
 Here we first create a list of assets; second we distribute randomly their ownership (fractional).

In [12]:
import random
import string

N = 20 # number of nodes
c = 0.15 # controll parameter for the connectivity U(0,c)


# code to generate a single random string
def get_random_string(length):
    # Only upper case
    letters = string.ascii_uppercase
    result_str = ''.join(random.choice(letters) for i in range(length))
    # print random string
    #print(result_str)
    return result_str

# code to generate a list of random strings. Return approx "N=length" elements because identical ones are dropped
def gen_random_asset_list(length,name_length):
    my_asset_list = []
    for i in range(length):
        asset_name = get_random_string(name_length)
        my_asset_list.append(asset_name)
    #print(my_asset_list)
    
    #remove identical entries by passing the generated list into a set and back to a list.
    new_asset_list = list(set(my_asset_list))
    return new_asset_list


In [17]:
# random edge probability per node

import numpy as np

assetlist = gen_random_asset_list(20,2)
# generate a list of probabilities to a edge to exists from each node:
node_edge_prob = []
count = 0
for i in assetlist:
    node_edge_prob.append(np.random.uniform(0,0.1))
    print(i," ",node_edge_prob[count])
    count=count+1

DD   0.010742548787250296
FO   0.07153250324614253
OG   0.07448869373921624
QP   0.05499844149071658
MS   0.022387396435097375
TD   0.018939384505535273
NF   0.008558095194142202
ER   0.019177953510640878
MZ   0.09489015049803988
JT   0.07031492853127452
UN   0.07673728456957048
ZB   0.05147910021825372
PM   0.056536154844515546
RG   0.01816434714434263
SS   0.042256665805464104
ME   0.027759317594308375
RY   0.014841662756681218
CT   0.020250936484100625
QB   0.03200259319424453
FP   0.006563556197840137


In [19]:
#generating the edges
edges = []

count = 0
for i in assetlist:
    for j in assetlist:
        if np.random.uniform(0,1) < node_edge_prob[count] and i!=j:
            edges.append((i,j))
    count=count+1

print(edges)

[('FO', 'QP'), ('FO', 'ME'), ('OG', 'UN'), ('OG', 'ME'), ('OG', 'QB'), ('OG', 'FP'), ('MS', 'CT'), ('MZ', 'MS'), ('MZ', 'RG'), ('JT', 'NF'), ('JT', 'ER'), ('JT', 'ME'), ('UN', 'JT'), ('PM', 'DD'), ('RG', 'NF'), ('SS', 'ME'), ('RY', 'SS')]


In [20]:
#generate edges weigths: percentage over ownership

weights = [0]*len(edges)
print(weights)
assets_value = []

#this will split the value in a given number of parts
def split_num_random(value,num_parts):
    dividing_points = []
    parts = []
    if num_parts == 1:
        parts.append(value)
        
    elif num_parts > 1:
        for i in range(0, num_parts-1):
            dividing_points.append(np.random.uniform(0,value))
        dividing_points.sort()
        #print(dividing_points)
        old_value = 0
        for i in range(len(dividing_points)):
            parts.append(dividing_points[i]-old_value)
            old_value = dividing_points[i]
        parts.append(value-old_value)
        #print(parts)
    return parts


#here edges and weights vectors must already be define
#this code associate divide the ownership into many parts, each corresponding to an in-vector to a node
def gen_edge_weights(node):
    # count number of connections for a node
    count = 0
    edges_index=[]
    
    for i in range(len(edges)):
        if edges[i][1] == node:
            edges_index.append(i)
            count=count+1
    
    #print(count)
    parts_local = split_num_random(1,count)
    #print(parts_local)
    
    count = 0
    for i in edges_index:
        #print(i)
        # weights are changed directly over the external variable
        weights[i]=parts_local[count]
        count=count+1

    return True

#weights and node values
for i in assetlist:
    #generate the weights of connections
    health=gen_edge_weights(i)
    #generate the node value (between 1 and 100)
    #assets_value.append(np.random.uniform(1,100))
    
    #for testing, all worth 100
    assets_value.append(100)
    
print(weights)
print(assets_value)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 0.009334687375733286, 1, 0.7681354811550553, 1, 1, 1, 1, 1, 0.8825092623574553, 1, 0.07133475570285741, 1, 1, 0.1174907376425447, 0.15119507576635405, 1]
[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]


In [21]:
# plot the graph

import networkx as nx
#import matplotlib.pyplot as plt
from pyvis.network import Network

G=nx.DiGraph()
G.add_edges_from(edges)
count=0
print(G)
net = Network(directed =True,notebook=True, cdn_resources='remote')
net.from_nx(G)
net.repulsion()
net.show("test.html")



DiGraph with 18 nodes and 17 edges
test.html


In [11]:
# setting an output to be loaded in Griemling as a dataset
# here I will format the output in the graphML standard
import os
import time

G2=nx.DiGraph()
for i in range(len(assetlist)):
    G2.add_node(assetlist[i],value=assets_value[i])
    
for i in range(len(edges)):
    G2.add_edge(edges[i][0],edges[i][1],fraction=weights[i])
    
#print(nx.get_node_attributes(G2,"value"))
#print("\n",nx.get_edge_attributes(G2,"fraction"))

time_stamp = time.strftime("%d-%H%M%S") 
file_name = "./data/graph_"+time_stamp +".graphml"
output_file = open(file_name,"w")
nx.write_graphml_lxml(G2,file_name)
output_file.close()