In [1]:
import numpy as np
from scipy.sparse import csr_matrix
from scipy.io import mmwrite

In [2]:
def as_dense_matrix(edges, vocab2idx, directed=True):

    def add_edge(i, j, w):
        g[i,j] = w

    n = len(vocab2idx)
    g = np.zeros((n, n))
    for node1, node2, weight in edges:
        try:
            idx1 = vocab2idx[node1]
            idx2 = vocab2idx[node2]
            add_edge(idx1, idx2, weight)
            if not directed:
                add_edge(idx2, idx1, weight)
        except:
            print('Exception : {} -> {} : {}'.formart(
                    node1, node2, weight))

    return g

def save_sparse_graph(g, idx2vocab, header):
    g_path = '{}.mtx'.format(header)
    index_path = '{}.vocab'.format(header)
    mmwrite(g_path, g)
    with open(index_path, 'w', encoding='utf-8') as f:
        for vocab in idx2vocab:
            f.write('{}\n'.format(vocab))

## subway line 2

홍성필 교수님의 "경영과학, 율곡출판사"의 최단경로문제 예시 데이터를 sparse matrix 형식의 데이터로 만드는 코드입니다.

In [3]:
idx2vocab = '신도림 시청 을지로3가 동대문운동장 충무로 서울역 사당 교대'.split()
vocab2idx = {vocab:idx for idx, vocab in enumerate(idx2vocab)}

edges = [
    ('신도림', '시청', 23),
    ('시청', '을지로3가', 4),
    ('을지로3가', '동대문운동장', 3),
    ('동대문운동장', '충무로', 2),
    ('을지로3가', '충무로', 1),
    ('서울역', '충무로', 5),
    ('시청', '서울역', 2),
    ('신도림', '서울역', 17),
    ('신도림', '사당', 17),
    ('서울역', '사당', 16),
    ('사당', '교대', 7),
    ('충무로', '교대', 18),
    ('교대', '동대문운동장', 34)
]

g = as_dense_matrix(edges, vocab2idx, directed=False)
print(g)

[[ 0. 23.  0.  0.  0. 17. 17.  0.]
 [23.  0.  4.  0.  0.  2.  0.  0.]
 [ 0.  4.  0.  3.  1.  0.  0.  0.]
 [ 0.  0.  3.  0.  2.  0.  0. 34.]
 [ 0.  0.  1.  2.  0.  5.  0. 18.]
 [17.  2.  0.  0.  5.  0. 16.  0.]
 [17.  0.  0.  0.  0. 16.  0.  7.]
 [ 0.  0.  0. 34. 18.  0.  7.  0.]]


In [4]:
g_sparse = csr_matrix(g)
print(g_sparse)

  (0, 1)	23.0
  (0, 5)	17.0
  (0, 6)	17.0
  (1, 0)	23.0
  (1, 2)	4.0
  (1, 5)	2.0
  (2, 1)	4.0
  (2, 3)	3.0
  (2, 4)	1.0
  (3, 2)	3.0
  (3, 4)	2.0
  (3, 7)	34.0
  (4, 2)	1.0
  (4, 3)	2.0
  (4, 5)	5.0
  (4, 7)	18.0
  (5, 0)	17.0
  (5, 1)	2.0
  (5, 4)	5.0
  (5, 6)	16.0
  (6, 0)	17.0
  (6, 5)	16.0
  (6, 7)	7.0
  (7, 3)	34.0
  (7, 4)	18.0
  (7, 6)	7.0


In [5]:
path_header = '../data/subway_line2'

save_sparse_graph(g_sparse, idx2vocab, path_header)

## toy data

홍성필 교수님의 "경영과학, 율곡출판사"의 최단경로문제 예시 데이터를 sparse matrix 형식의 데이터로 만드는 코드입니다.

In [9]:
idx2vocab = 's 2 3 4 5 6 t'.split()
vocab2idx = {vocab:idx for idx, vocab in enumerate(idx2vocab)}

edges = [
    ('s' ,'2', 2),
    ('s' ,'3', 7),
    ('2' ,'4', 3),
    ('2' ,'3', 1),
    ('3' ,'5', 4),
    ('3' ,'6', 5),
    ('4' ,'3', 2),
    ('4' ,'5', 3),
    ('5' ,'6', 4),
    ('5' ,'t', 1),
    ('6', 't', 5)
]

g = as_dense_matrix(edges, vocab2idx, directed=True)
print(g)

[[0. 2. 7. 0. 0. 0. 0.]
 [0. 0. 1. 3. 0. 0. 0.]
 [0. 0. 0. 0. 4. 5. 0.]
 [0. 0. 2. 0. 3. 0. 0.]
 [0. 0. 0. 0. 0. 4. 1.]
 [0. 0. 0. 0. 0. 0. 5.]
 [0. 0. 0. 0. 0. 0. 0.]]


In [10]:
g_sparse = csr_matrix(g)
path_header = '../data/prof_hong_toy'

save_sparse_graph(g_sparse, idx2vocab, path_header)