# Synthetic data generator for asset networks
 Here we first create a list of assets; second we distribute randomly their ownership (fractional).

In [1]:
import random
import string

# code to generate a single random string
def get_random_string(length):
    # Only upper case
    letters = string.ascii_uppercase
    result_str = ''.join(random.choice(letters) for i in range(length))
    # print random string
    #print(result_str)
    return result_str

# code to generate a list of random strings. Return approx "N=length" elements because identical ones are dropped
def gen_random_asset_list(length,name_length):
    my_asset_list = []
    for i in range(length):
        asset_name = get_random_string(name_length)
        my_asset_list.append(asset_name)
    #print(my_asset_list)
    
    #remove identical entries by passing the generated list into a set and back to a list.
    new_asset_list = list(set(my_asset_list))
    return new_asset_list


In [2]:
# random edge probability per node

import numpy as np

assetlist = gen_random_asset_list(10,2)
# generate a list of probabilities to a edge to exists from each node:
node_edge_prob = []
count = 0
for i in assetlist:
    node_edge_prob.append(np.random.uniform(0,0.5))
    print(i," ",node_edge_prob[count])
    count=count+1

VN   0.07727609736323748
XO   0.0072445223443101114
CJ   0.4441718863991855
UO   0.4876682407575543
RX   0.3171874605963677
CH   0.14878082554179495
WM   0.3165648890710265
LZ   0.40614687014244966
ZS   0.1229900794546806
JJ   0.49026221151420807


In [3]:
#generating the edges
edges = []

count = 0
for i in assetlist:
    for j in assetlist:
        if np.random.uniform(0,1) < node_edge_prob[count] and i!=j:
            edges.append((i,j))
    count=count+1

print(edges)

[('CJ', 'UO'), ('CJ', 'CH'), ('CJ', 'JJ'), ('UO', 'XO'), ('UO', 'RX'), ('UO', 'CH'), ('UO', 'ZS'), ('RX', 'UO'), ('RX', 'CH'), ('RX', 'LZ'), ('RX', 'JJ'), ('WM', 'LZ'), ('WM', 'JJ'), ('LZ', 'VN'), ('LZ', 'XO'), ('LZ', 'WM'), ('LZ', 'JJ'), ('JJ', 'VN'), ('JJ', 'CJ'), ('JJ', 'WM'), ('JJ', 'ZS')]


In [4]:
#generate edges weigths: percentage over ownership

weights = [0]*len(edges)
print(weights)
assets_value = []

#this will split the value in a given number of parts
def split_num_random(value,num_parts):
    dividing_points = []
    parts = []
    if num_parts == 1:
        parts.append(value)
        
    elif num_parts > 1:
        for i in range(0, num_parts-1):
            dividing_points.append(np.random.uniform(0,value))
        dividing_points.sort()
        #print(dividing_points)
        old_value = 0
        for i in range(len(dividing_points)):
            parts.append(dividing_points[i]-old_value)
            old_value = dividing_points[i]
        parts.append(value-old_value)
        #print(parts)
    return parts


#here edges and weights vectors must already be define
#this code associate divide the ownership into many parts, each corresponding to an in-vector to a node
def gen_edge_weights(node):
    # count number of connections for a node
    count = 0
    edges_index=[]
    
    for i in range(len(edges)):
        if edges[i][1] == node:
            edges_index.append(i)
            count=count+1
    
    #print(count)
    parts_local = split_num_random(1,count)
    #print(parts_local)
    
    count = 0
    for i in edges_index:
        #print(i)
        # weights are changed directly over the external variable
        weights[i]=parts_local[count]
        count=count+1

    return True

#weights and node values
for i in assetlist:
    #generate the weights of connections
    health=gen_edge_weights(i)
    #generate the node value (between 1 and 100)
    assets_value.append(np.random.uniform(1,100))
    
print(weights)
print(assets_value)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0.18294082436623982, 0.5270461755539644, 0.32221368243755877, 0.3660418628716223, 1, 0.18043023760170718, 0.04526949780921219, 0.8170591756337602, 0.29252358684432844, 0.18030796851242747, 0.1786311848258123, 0.8196920314875725, 0.004420388134103503, 0.2440035249542467, 0.6339581371283777, 0.6991848045633375, 0.4947347446025254, 0.7559964750457533, 1, 0.30081519543666246, 0.9547305021907878]
[65.73142146868169, 97.74581512630425, 31.241694626483024, 33.358550893124345, 95.9976560529197, 63.08817583623501, 58.63427824516458, 40.966957244386506, 48.68201325380122, 51.717548706499386]


In [5]:
# plot the graph

import networkx as nx
#import matplotlib.pyplot as plt
from pyvis.network import Network

G=nx.DiGraph()
G.add_edges_from(edges)
count=0
print(G)
net = Network(directed =True,notebook=True, cdn_resources='remote')
net.from_nx(G)
net.repulsion()
net.show("test.html")



DiGraph with 10 nodes and 21 edges
test.html


In [9]:
# setting an output to be loaded in Griemling as a dataset
# here I will format the output in the graphML standard
import os
import time

G2=nx.DiGraph()
for i in range(len(assetlist)):
    G2.add_node(assetlist[i],value=assets_value[i])
    
for i in range(len(edges)):
    G2.add_edge(edges[i][0],edges[i][1],fraction=weights[i])
    
#print(nx.get_node_attributes(G2,"value"))
#print("\n",nx.get_edge_attributes(G2,"fraction"))

time_stamp = time.strftime("%d-%H%M%S") 
file_name = "./data/graph_"+time_stamp +".graphml"
output_file = open(file_name,"w")
#nx.write_graphml_lxml(G2,"./data_out/test1.graphml")
output_file.close()