In [1]:
import numpy as np
import scipy.sparse as sp
import torch
import pandas as pd

In [2]:
df = pd.read_csv('./data/gcn.csv', encoding='cp949')
df

Unnamed: 0,정보,1,2,3,4,5,label
0,555,1,0,0,0,0,강아지
1,777,0,1,0,1,0,고양이
2,333,0,0,1,0,0,얼룩말
3,999,0,0,0,1,0,고양이
4,111,1,0,0,0,1,강아지


# 정보1

In [3]:
# 정보 1 
idx_features_labels = np.array(df)
idx_features_labels

array([[555, 1, 0, 0, 0, 0, '강아지'],
       [777, 0, 1, 0, 1, 0, '고양이'],
       [333, 0, 0, 1, 0, 0, '얼룩말'],
       [999, 0, 0, 0, 1, 0, '고양이'],
       [111, 1, 0, 0, 0, 1, '강아지']], dtype=object)

In [4]:
# 노드의 features
features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype = np.float32)
features.toarray() # [1:-1] 의 행렬만 가져온다.

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 1.]], dtype=float32)

In [5]:
# label값 원핫인코딩
classes = set(idx_features_labels[:, -1])
print('label 종류 : ' , classes)

classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
labels_onehot = np.array(list(map(classes_dict.get,idx_features_labels[:, -1])), dtype=np.int32) # dict.get 은 Key에 해당하는 value들을 돌려준다. 이를 array로 !
labels = labels_onehot
labels

label 종류 :  {'고양이', '얼룩말', '강아지'}


array([[0, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1]])

# 정보 2

In [6]:
# tensor에 올리기 전이기 때문에 양방향 x
edges_unordered = np.array([[555, 111],
                            [777, 999]])
edges_unordered

array([[555, 111],
       [777, 999]])

In [7]:
# 논문번호 -> 인덱스 로 변환
idx = np.array(idx_features_labels[:,0], 
               dtype=np.int32)
idx_map = {j : i for i, j in enumerate(idx)}
idx_map

{555: 0, 777: 1, 333: 2, 999: 3, 111: 4}

In [8]:
# 논문 번호의 해당 인덱스를 가져와서 -> 노드, 노드 로 변경
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape)
edges 

array([[0, 4],
       [1, 3]])

# 정보 1 + 정보 2

In [9]:
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), # (data(i, j))
                    shape = (labels.shape[0], labels.shape[0]), # [shape=(M, N)]
                    dtype=np.float32)

print('{}'.format(adj))

  (0, 4)	1.0
  (1, 3)	1.0


## adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

In [10]:
#########얘랑
print('{}'.format(adj))  # 원래 데이터

  (0, 4)	1.0
  (1, 3)	1.0


In [11]:
print('{}'.format(adj.T)) # 양방향인거 그래프에 넣어야하기 때문에

  (4, 0)	1.0
  (3, 1)	1.0


In [12]:
#######얘랑
print('{}'.format(adj.T.multiply(adj.T > adj))) # 오름차순 정렬 어떻게되느거지..?

  (3, 1)	1.0
  (4, 0)	1.0


In [13]:
####### 얘랑
print('{}'.format(adj.multiply(adj.T > adj))) # 결과값 없음




In [14]:
print('{}'.format(adj + adj.T.multiply(adj.T > adj))) # 행렬합해준다 

  (0, 4)	1.0
  (1, 3)	1.0
  (3, 1)	1.0
  (4, 0)	1.0


In [15]:
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
print('{}'.format(adj))

  (0, 4)	1.0
  (1, 3)	1.0
  (3, 1)	1.0
  (4, 0)	1.0


# normalize

In [16]:
# normalize
def normalize(mx) : 
    rowsum = np.array(mx.sum(1)) # 각 노드 정보 개수
    print('rowsum')
    print(rowsum)

    # r_inv
    r_inv = np.power(rowsum, -1).flatten() # 0, 1, # power : 0, 1, 8, 27, ,,, / 0, 1, 4, 9, ,,, # 역행렬을 취해준다..왜 ?
    print('r_inv')
    print(r_inv)
    r_inv[np.isinf(r_inv)] = 0
    print(r_inv)


    # r_mat_inv
    r_mat_inv = sp.diags(r_inv) # 뭔가 늘려서 행렬로 만드는거같음
    mx = r_mat_inv.dot(mx)

    return mx

In [17]:
print('{}'.format(adj + sp.eye(adj.shape[0]))) # 대각행렬 
# adj + sp.eye(5)

  (0, 0)	1.0
  (0, 4)	1.0
  (1, 1)	1.0
  (1, 3)	1.0
  (2, 2)	1.0
  (3, 1)	1.0
  (3, 3)	1.0
  (4, 0)	1.0
  (4, 4)	1.0


In [18]:
print('{}'.format(features))

  (0, 0)	1.0
  (1, 1)	1.0
  (1, 3)	1.0
  (2, 2)	1.0
  (3, 3)	1.0
  (4, 0)	1.0
  (4, 4)	1.0


In [19]:
# features : 각 노드정보
features = normalize(features)


# adj : 노드-노드 연결정보
adj = normalize(adj + sp.eye(adj.shape[0])) # 노드연결정보 + self 노드

rowsum
[[1.]
 [2.]
 [1.]
 [1.]
 [2.]]
r_inv
[1.  0.5 1.  1.  0.5]
[1.  0.5 1.  1.  0.5]
rowsum
[[2.]
 [2.]
 [1.]
 [2.]
 [2.]]
r_inv
[0.5 0.5 1.  0.5 0.5]
[0.5 0.5 1.  0.5 0.5]


In [20]:
print(adj) # 뭔가 나눠갖는 느낌

  (0, 4)	0.5
  (0, 0)	0.5
  (1, 3)	0.5
  (1, 1)	0.5
  (2, 2)	1.0
  (3, 3)	0.5
  (3, 1)	0.5
  (4, 4)	0.5
  (4, 0)	0.5


In [21]:
# 원핫인코딩 된 label 중 해당하는 label이 몇 번 째인지
labels = torch.LongTensor(np.where(labels)[1]) 
print(labels)

tensor([2, 0, 1, 0, 2])


In [22]:
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    
    sparse_mx = sparse_mx.tocoo().astype(np.float32)

    # 노드
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) # vstack : 행 추가

    # 노드 간 edge의 정보
    values = torch.from_numpy(sparse_mx.data) # numpy.ndarray를 tensor로 올려줌

    # 노드 개수, 특성 개수
    shape = torch.Size(sparse_mx.shape)

    return torch.sparse.FloatTensor(indices, values, shape) # sparse : 크기에 맞게 값을 뿌려주는 것 같은데 규칙 잘 모르겠다.

In [23]:
# matrix -> tensor
adj = sparse_mx_to_torch_sparse_tensor(adj)
adj

tensor(indices=tensor([[0, 4, 1, 3, 2, 1, 3, 0, 4],
                       [0, 0, 1, 1, 2, 3, 3, 4, 4]]),
       values=tensor([0.5000, 0.5000, 0.5000, 0.5000, 1.0000, 0.5000, 0.5000,
                      0.5000, 0.5000]),
       size=(5, 5), nnz=9, layout=torch.sparse_coo)

In [24]:
A = adj

In [25]:
print(A)
print(features) # 노드 feature 정보, 노드간 edge정보를 scaling 한 행렬
print(labels) # label

tensor(indices=tensor([[0, 4, 1, 3, 2, 1, 3, 0, 4],
                       [0, 0, 1, 1, 2, 3, 3, 4, 4]]),
       values=tensor([0.5000, 0.5000, 0.5000, 0.5000, 1.0000, 0.5000, 0.5000,
                      0.5000, 0.5000]),
       size=(5, 5), nnz=9, layout=torch.sparse_coo)
  (0, 0)	1.0
  (1, 3)	0.5
  (1, 1)	0.5
  (2, 2)	1.0
  (3, 3)	1.0
  (4, 4)	0.5
  (4, 0)	0.5
tensor([2, 0, 1, 0, 2])


In [45]:
import torch.optim as optim

import torch.nn as nn

class GCN_layer(nn.Module) :
    def __init__(self, in_features, out_features, A) :
        super(GCN_layer, self).__init__() 
        self.in_features = in_features
        self.out_features = out_features
        self.A = A
        self.fc = nn.Linear(in_features, out_features)

    def foward(self, X) :
        return self.fc(torch.spmm(self.A, X)) # 이웃 정보 종합


class GCN(nn.Module) :
    def __init__(self, num_feature, num_class, A) :
        super(GCN, self).__init__()
        self.feature_extractor = nn.Sequential( GCN_layer(num_feature, 16, A),
                                            nn.ReLU(),
                                            GCN_layer(16, num_class, A))

    def forward(self, X) :
        return self.feature_extractor(X)


model = GCN(features.size, labels.unique().size(0), A) # 

In [48]:
_, h = model(features, labels)

TypeError: forward() takes 2 positional arguments but 3 were given

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
train(model, criterion, optimizer, 1000)