In [1]:
import os
import numpy as np
import networkx as nx
import pandas as pd
import h5py

In [2]:
def write_weightless_flag_file(g, output_file, data=False):
    zeros = np.zeros(len(g.nodes()), dtype=int)
    d = {k: v for v, k in enumerate(list(g.nodes()))}
    with open(output_file, 'w') as f:
        f.write('dim 0')
        f.write('\n')
        f.write(' '.join(map(str, zeros)))
        f.write('\n')
        f.write('dim 1')
        f.write('\n')
        if data:
            for e in g.edges(data=True):
                f.write('{} {} {}'.format(str(d[e[0]]), str(d[e[1]]), str(e[2]['weight'])))
                f.write('\n')
        else:
            for e in g.edges(data=False):
                f.write('{} {} {}'.format(str(d[e[0]]), str(d[e[1]]), '0'))
                f.write('\n')
                
def write_hdf5(g, output_file):
    h5f = h5py.File(output_file, 'w')
    try:
        x = nx.to_scipy_sparse_matrix(g)
        h5f.create_dataset('adjacency', shape=x.shape)
        rows,cols = x.nonzero()
        h5f[rows,cols] = x[rows,cols]
    except Exception:
        h5f.close()

In [3]:
collab_data_loc = '/home/gebhart/projects/rfunklab/data/temporal/1_1/collaboration'
knowl_data_loc = '/home/gebhart/projects/rfunklab/data/temporal/1_1/knowledge'

In [4]:
p = collab_data_loc

In [None]:

fs = [f for f in os.listdir(p) if os.path.isfile(os.path.join(p, f))]
fs.sort()

for f in fs:
    g = nx.Graph()
    print('{}'.format(f))
    df = pd.read_csv(os.path.join(p, f), header=0)
    for idx, row in df.iterrows():
        g.add_edge(row['inventor_id_a'], row['inventor_id_b'], weight=row['patents'])
#         g.add_edge(row['subgroup_id_a'], row['subgroup_id_b'], weight=row['patents'])
    write_weightless_flag_file(g, os.path.join(p, 'flag', '{}.flag'.format(f.replace('.csv',''))), data=True)

In [5]:
df = pd.read_csv(os.path.join(p, '1995.csv'), header=0)

In [6]:
g = nx.Graph()
for idx, row in df.iterrows():
    g.add_edge(row['inventor_id_a'], row['inventor_id_b'], weight=row['patents'])

In [7]:
write_hdf5(g,os.path.join(p, 'h5', '{}.h5'.format('1995')))

In [8]:
f = h5py.File(os.path.join(p, 'h5', '{}.h5'.format('1995')),'r')

In [17]:
f['adjacency'][0,6]

0.0

In [12]:
x = nx.to_scipy_sparse_matrix(g)

In [18]:
rows,cols = x.nonzero()

In [25]:
x[rows[1],cols[2]]

array(0)