## 0. Mengimport Modul dan Membaca Data

In [None]:
# Machine Larning Model
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import xgboost as xgb

# Graph processing and embedding
from torch_geometric.loader import NeighborLoader, ImbalancedSampler
from torch_geometric.utils import train_test_split_edges
from torch_geometric.nn.models.autoencoder import ARGVA
from torch_geometric.nn import GCNConv, GATConv
import torch_geometric.transforms as T
from torch_geometric.data import Data
import torch.nn.functional as F
from torch.nn import Linear
import torch_geometric
import torch

# Others
import time
import pandas as pd
import numpy as np

In [None]:
# Setting devices
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define Preprocessing
transform = T.Compose([
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
    T.RandomNodeSplit(split='train_rest', num_test=0, num_val=0),
    T.NormalizeFeatures()])

# Loading data
data = torch.load('BankSim 8000')
data.x = data.x.float()
train_data, val_data, test_data = transform(data)

## 1. Mendefinisikan Model AAVGA dan fungsi training

In [None]:
# Mendefinisikan kelas encoder dengan k=1 (n_heads)
class Encoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=1)
        self.conv_mu = GCNConv(hidden_channels, out_channels, heads=1)
        self.conv_logstd = GCNConv(hidden_channels, out_channels, heads=1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

# Mendefinisikan kelas diskriminator (MLP)
class Discriminator(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.lin1 = Linear(in_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, hidden_channels)
        self.lin3 = Linear(hidden_channels, out_channels)

    def forward(self, x):
        x = self.lin1(x).relu()
        x = self.lin2(x).relu()
        return self.lin3(x)

# Mendefinisikan model
encoder = Encoder(train_data.num_features, hidden_channels=32, out_channels=32)
discriminator = Discriminator(in_channels=32, hidden_channels=64,
                              out_channels=32)
model = ARGVA(encoder, discriminator).to(device)
encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=0.005)
discriminator_optimizer = torch.optim.Adam(discriminator.parameters(),
                                           lr=0.001)

# Mendefinisikan fungsi training
def train():
    model.train()
    encoder_optimizer.zero_grad()
    
    z = model.encode(train_data.x, train_data.edge_index)

    # We optimize the discriminator more frequently than the encoder.
    for i in range(5):
        discriminator_optimizer.zero_grad()
        discriminator_loss = model.discriminator_loss(z)
        discriminator_loss.backward()
        discriminator_optimizer.step()

    loss = model.recon_loss(z, train_data.pos_edge_label_index)
    loss = loss + model.reg_loss(z)
    loss = loss + (1 / train_data.num_nodes) * model.kl_loss()
    loss.backward()
    encoder_optimizer.step()
    return float(loss)

# Mendefinisikan fungsi testing
@torch.no_grad()
def test(data):
    model.eval()
    z = model.encode(data.x, data.edge_index)
    label = data.y.cpu().numpy()
    classifier = xgb.XGBClassifier(random_state=42)
    clf_input = z.cpu().numpy()
    clf_output = classifier.fit(X=clf_input, y=label)
    pred = clf_output.predict(clf_input)
    auc, ap = model.test(z, data.pos_edge_label_index, data.neg_edge_label_index)
    acc = accuracy_score(label, pred)
    cm = confusion_matrix(label, pred)
    auc_clf = roc_auc_score(label, pred)
    ap_clf = average_precision_score(label, pred)
    return acc, cm, auc_clf, ap_clf, z, label, auc, ap

## 2. Melakukan Training model AAVGA

In [8]:
start_time = time.time()

losses = []
acc_list = []
cm_list = []
auc_clf_list = []
ap_clf_list = []
for epoch in range(10):
    loss = train()
    losses.append(loss)
    acc, cm, auc_clf, ap_clf, z, label, auc, ap = test(test_data)
    acc_list.append(acc)
    cm_list.append(cm)
    auc_clf_list.append(auc_clf)
    ap_clf_list.append(ap_clf)
    #if epoch % 5 == 0:
    print((f'Epoch: {epoch:03d}, Loss: {loss:.3f}, ACC: {acc:.3f}, AUC_CLF: {auc_clf:.3f}, AP_CLF: {ap_clf:.3f}'))
    print('AUC link pred:', auc, 'AP link pred:', ap)
end_time = time.time() - start_time
print('Training time for 10 epochs:', end_time)

Epoch: 000, Loss: 5.330, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.40731652510459027 AP link pred: 0.42581670331867727
Epoch: 001, Loss: 5.188, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.4289632221833005 AP link pred: 0.43772589985809585
Epoch: 002, Loss: 5.055, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.4346350743401321 AP link pred: 0.4409532231563128
Epoch: 003, Loss: 4.907, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.4427415861685526 AP link pred: 0.44535725343110855
Epoch: 004, Loss: 4.782, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.46343029994083446 AP link pred: 0.45722401295948145
Epoch: 005, Loss: 4.677, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.49934643443270194 AP link pred: 0.4775976007769435
Epoch: 006, Loss: 4.535, ACC: 1.000, AUC_CLF: 1.000, AP_CLF: 1.000
AUC link pred: 0.543303120724039 AP link pred: 0.5000313457089266
Epoch: 007, Loss: 4.380, ACC: 1.000, AUC_CLF: 1.000, AP_

## 3. Menyimpan hasil penyematan (Z)

In [None]:
embedding = pd.DataFrame(z.numpy())
embedding.to_csv('Embedding AAVGA without ImbalanceSampler.csv')