In [1]:
import os.path as osp

import torch
from torch_geometric.nn import MetaPath2Vec

In [2]:
from scipy import io
from torch_geometric.utils import from_scipy_sparse_matrix
from torch_geometric.data import Data

# graph 1

In [3]:
g = io.loadmat('/datasets/dsc180a-wi20-public/Malware/group_data/group_02/sensitive_data/interim/graph/graph_1.mat')

In [4]:
user_user = from_scipy_sparse_matrix(g['U'])
author_post = from_scipy_sparse_matrix(g['A'])
post_user = from_scipy_sparse_matrix(g['P'])

In [5]:
g['post_indx'].shape[1]

103413

In [6]:
data = Data(
    edge_index_dict = {
        ('user', 'replied by', 'user') : user_user[0],
        ('user', 'wrote', 'post') : author_post[0],
        ('post', 'commented by', 'user') : post_user[0],
    },
    num_nodes_dict = {
        'post': g['post_indx'].shape[1],
        'user' : g['user_indx'].shape[1]
    },
    y_dict = {
        'post': torch.from_numpy(g['post_label'].reshape(-1,)).long()
    },
    x_dict = {
        'post': torch.from_numpy(g['post_cate']).float()
    }
)

In [7]:
metapath = [
    ('post', 'commented by', 'user'),
    ('user', 'replied by', 'user'),
    ('user', 'wrote', 'post')
]

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
model = MetaPath2Vec(data.edge_index_dict, embedding_dim=128,
                     metapath=metapath, walk_length=50, context_size=7,
                     walks_per_node=5, num_negative_samples=5,
                     sparse=True).to(device)

In [26]:
loader = model.loader(batch_size=16, shuffle=False, num_workers=8)
optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01)

In [29]:
loading = iter(loader)
counter = 0
while tmp != None:
    prev = tmp
    try:
        tmp = next(loading)
    except IndexError:
        continue
    except StopIteration:
        tmp = None
    counter += 1

In [30]:
counter

6465

In [31]:
prev

(tensor([[103408, 214320, 107165,  ..., 539621, 446463,  43523],
         [103409, 199599, 571074,  ..., 474440, 288318,  75412],
         [103410, 355714, 486876,  ..., 234713, 560888,  89601],
         ...,
         [278157,  15491, 536311,  ...,  78628, 142000, 154703],
         [576989,   5994, 209205,  ...,   3186, 140329, 226872],
         [256241,   2591, 411321,  ...,   4737, 411321, 214266]]),
 tensor([[103408, 146529, 402927,  ..., 153844, 251162,  49938],
         [103409, 551784, 381513,  ..., 537641, 485434,  45425],
         [103410, 307983, 246372,  ..., 200748, 451944,  77453],
         ...,
         [257165,    595, 212398,  ...,  73577, 564333, 464584],
         [320349,  34090, 199319,  ...,  26163, 218989, 373654],
         [517312,  57033, 575295,  ...,  10769, 449020, 438912]]))

In [16]:
def train(epoch, log_steps=100, eval_steps=2000):
    model.train()

    total_loss = 0
    for i, (pos_rw, neg_rw) in enumerate(loader):
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        if (i + 1) % log_steps == 0:
            print((f'Epoch: {epoch}, Step: {i + 1:05d}/{len(loader)}, '
                   f'Loss: {total_loss / log_steps:.4f}'))
            total_loss = 0

In [17]:
for epoch in range(1, 5):
    train(epoch)
    print(f'Epoch: {epoch}')

Epoch: 1, Step: 00100/6464, Loss: 9.7041
Epoch: 1, Step: 00200/6464, Loss: 9.0075
Epoch: 1, Step: 00300/6464, Loss: 8.3603
Epoch: 1, Step: 00400/6464, Loss: 7.7983
Epoch: 1, Step: 00500/6464, Loss: 7.3316
Epoch: 1, Step: 00600/6464, Loss: 6.9026
Epoch: 1, Step: 00700/6464, Loss: 6.4879
Epoch: 1, Step: 00800/6464, Loss: 6.1329
Epoch: 1, Step: 00900/6464, Loss: 5.8208
Epoch: 1, Step: 01000/6464, Loss: 5.4998
Epoch: 1, Step: 01100/6464, Loss: 5.2193
Epoch: 1, Step: 01200/6464, Loss: 4.9503
Epoch: 1, Step: 01300/6464, Loss: 4.6946
Epoch: 1, Step: 01400/6464, Loss: 4.4528
Epoch: 1, Step: 01500/6464, Loss: 4.2330
Epoch: 1, Step: 01600/6464, Loss: 4.0273
Epoch: 1, Step: 01700/6464, Loss: 3.8290
Epoch: 1, Step: 01800/6464, Loss: 3.6388
Epoch: 1, Step: 01900/6464, Loss: 3.4611
Epoch: 1, Step: 02000/6464, Loss: 3.3034
Epoch: 1, Step: 02100/6464, Loss: 3.1654
Epoch: 1, Step: 02200/6464, Loss: 3.0243
Epoch: 1, Step: 02300/6464, Loss: 2.9031
Epoch: 1, Step: 02400/6464, Loss: 2.7898
Epoch: 1, Step: 

KeyboardInterrupt: 

In [13]:
model('post')

tensor([[-0.2263, -0.2811, -0.1195,  ..., -0.0130, -0.0995,  0.4083],
        [-0.0524, -0.2548,  0.0232,  ..., -0.0941, -0.1302,  0.0568],
        [ 0.1766,  0.2105,  0.4731,  ...,  0.2764, -0.1551, -0.0673],
        ...,
        [ 0.0261, -0.0148, -0.0330,  ..., -0.1629,  0.3715, -0.0166],
        [-0.1730, -0.0090,  0.1922,  ..., -0.3166,  0.2244, -0.2501],
        [-0.3542, -0.3214, -0.3340,  ..., -0.4198,  0.2730,  0.1638]],
       device='cuda:0', grad_fn=<SliceBackward>)