# AML Data Preprocessing
- Using the IBM dataset for AML: https://www.kaggle.com/datasets/ealtman2019/ibm-transactions-for-anti-money-laundering-aml
- Dataset is generated by IBM Box Generator, models transactions and illicit activities.
- Original dataset for training will be extensively large, for initial stages of thesis, using smaller dataset of 500,000 transactions.
- In the following we will:
1. explore the data
2. determine nodes and edges
3. determine node and edge attributes
4. create visualization using NetworkX, PyVis, or Graph-tool

* Attributes on ACCOUNT
    * Bank account
    * Account balance
    * BIN number
    * Number of transactions (calculated)
    * Receiving Currency
* Attributes on TRANSACTIONS
    * Payment amount
    * Payment Type
    * Payment Currency (based on “receiving currency” of outgoing bank account)
    * Time


## BASICS

### --- IMPORT LIBRARIES --- 

In [None]:
! pip3 install torch numpy pandas matplotlib torch-geometric

In [None]:
import torch
import time
import random
import hashlib
import datetime
import itertools
import numpy as np
import pandas as pd
import networkx as nx
from pandas import Timestamp
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.utils import to_networkx

### --- LOAD DATASET --- 

In [None]:
filename = "/Users/owhy/Documents/Datasets/HI-Small_Trans.csv"

In [None]:
data = pd.read_csv(filename)
data.head()

In [None]:
print(f"---- shape ----\n - {data.shape}")

In [None]:
print("---- info ----")
data.info()

In [None]:
print("---- basic calculations ----")
data.describe()

## NODE MATRIX

Nodes = Bank Accounts -- bank account number
* BIN Number
* Receiving Currency

* Number of transactions (degree matrix --> calculated based on incoming and outcoming flows)

Edges = transactions -- payment amount
* Payment Type
* Payment Currency
* Date and Time

* Account Balance (before transactions)

In [None]:
data

In [None]:
### merging all accounts and getting unique values

# Merge "Account" and "Account.1" columns
merged_accounts = pd.concat([data['Account'], data['Account.1']])

# Merge "From Bank" and "To Bank" columns
merged_banks = pd.concat([data['From Bank'], data['To Bank']])

# Merge "Receiving Currency" and "Payment Currency" columns
merged_currencies = pd.concat([data['Receiving Currency'], data['Payment Currency']])

# Create a new DataFrame with merged columns
merged_df = pd.DataFrame({
    'Accounts': merged_accounts,
    'Bank': merged_banks,
    'Currency': merged_currencies
})

merged_df


In [None]:
# Drop duplicates based on the "Accounts" column to ensure uniqueness
unique_accounts = merged_df.drop_duplicates(subset=['Accounts']).reset_index(drop=True)
unique_accounts

#### --- One-hot encoding: currency ---

In [None]:
# Convert non-numeric columns
positions = unique_accounts["Currency"].str.split(",", expand=True) # creating new columns by splitting receiving currency --> all are added
unique_accounts["first_position"] = positions[0] # first currency in each row is extracted --> actual currency used and that we want as TRUE
# One-hot encoding 
node_features = pd.concat([unique_accounts, pd.get_dummies(unique_accounts["first_position"],dtype='int')], axis=1, join='inner') # effectively adds actual currency to dummy variables/columns
node_features.drop(["Currency", "first_position"], axis=1, inplace=True) # drop the axiliary columns
node_features.head()

# TODO-DONE! conc unnique random number to the end --> maintain uniqueness
# TODO-DONE! node --> feature --> feature2 --> node2 | problem with uniqueness of node embeddings --> add unique value to identify the node feature vector
# TODO-DONE! create random identity vector for each ACCOUNT + add Account and From Bank as separate properties of the node
# TODO-DONE! ultimately normalize From bank
# TODO-DONE! feature matrix -- > receiving currency n-hot encoding (0 and 1) + encoding of From Bank and Account (word2vec)
# TODO add edge features --> look into EDGE LABELED GRAPHS where all nodes and edges have labels
# TODO create init for loading graph --> initial step


#### --- Normalization and Vectorization ---

In [None]:
# Normalize 'From Bank' & 'Account'
def normalize(table, new_min=0, new_max=10):
    if len(table.columns) == 1:
        normalized_df = ((table - table.min()) / (table.max() - table.min())) * (new_max - new_min) + new_min
        return normalized_df
    else:
        normalized_df = pd.DataFrame()
        id = 0
        for column in table.columns:
            col_data = table[column]
            if id == 0:
                normalized_df[f'col_{id}'] = ((col_data - col_data.min()) / (col_data.max() - col_data.min())) * (new_max - new_min) + new_min
            else:
                normalized_column = ((col_data - col_data.min()) / (col_data.max() - col_data.min())) * (new_max - new_min) + new_min
                normalized_df[f'col_{id}'] = normalized_column
            id += 1
        # print(normalized_df)
        return normalized_df

def hashing_vectorization(strings, vector_size=9):
    vectors = []
    for string in strings:
        # Hash the string using hash()
        hashed_values = hash(string) % (10 ** vector_size)  # Ensures unique representations within the specified range
        
        # Convert hashed values to a fixed-size vector
        vector = [int(digit) for digit in str(hashed_values)]
        
        # Ensure vector has the desired size by zero-padding or truncating
        if len(vector) < vector_size:
            vector = [0] * (vector_size - len(vector)) + vector
        elif len(vector) > vector_size:
            vector = vector[:vector_size]
        
        vectors.append(vector)
    
    return vectors

In [None]:
from_bank_col = node_features.pop('Bank')
account_col = node_features.pop('Accounts')

In [None]:
# node labels for later use

node_labels = pd.DataFrame(account_col)
node_labels

In [None]:
# TODO add vectors as individuals columns in new dataframe

df = pd.DataFrame(account_col, columns=['Accounts'])
df.reset_index(drop=True, inplace=True) # Ensure the DataFrame has the same number of rows as the original series
vectors = hashing_vectorization(df['Accounts'], vector_size=9) 

# Convert vectors into DataFrame
vectors_df = pd.DataFrame(vectors, columns=[f'col_{i}' for i in range(len(vectors[0]))])
result_df = pd.concat([df, vectors_df], axis=1)

In [None]:
accounts_df = result_df.drop(columns=["Accounts"])

In [None]:
from_bank_col

In [None]:
from_bank_binary = [bin(x).split("b")[1] for x in from_bank_col]
# vectors_df = pd.DataFrame(vectors, columns=[f'col_{i}' for i in range(len(vectors[0]))])

res = max(from_bank_binary, key=len) 
print("Longest String is  : ", res)
len(res)

from_bank_binary

In [None]:
def make_binary_fixed_length(binary_lists, res):
    new_binary_list = []
    for x in binary_lists:
        # print(x)
        if len(x) < len(res):
            num_zeros = len(res) - len(x)
            x = [0] * num_zeros + x
            # print(x)
            new_binary_list.append(x)
        else:
            new_binary_list.append(x)
    return new_binary_list

In [None]:
binary_lists = [[int(bit) for bit in binary] for binary in from_bank_binary]
binary_lists = make_binary_fixed_length(binary_lists, res)
binary_lists

In [None]:
# Convert vectors into DataFrame
bin_vectors_df = pd.DataFrame(binary_lists, columns=[f'bin_{i}' for i in range(len(binary_lists[0]))])
bin_vectors_df

In [None]:
# TODO normalize vector values to avoid big numbers

from_bank_df = pd.DataFrame(from_bank_col)
accounts_df = pd.DataFrame(accounts_df)

# from_bank_df_norm = normalize(from_bank_df,0,1) # TODO do not normalize at this point --> create BINARY representation
accounts_df_norm = normalize(accounts_df,0,1)

accounts_df_norm

In [None]:
node_features.reset_index(drop=True, inplace=True) # Ensure the DataFrame has the same number of rows as the original series
accounts_df_norm.reset_index(drop=True, inplace=True) # Ensure the DataFrame has the same number of rows as the original series

node_features = pd.concat([node_features, accounts_df_norm], axis=1)
node_features = pd.concat([node_features, bin_vectors_df], axis=1)
node_features

#### --- Account: Unique Identifier ---

In [None]:
# TODO add unique random identified

unique_ids_set = set()

while len(unique_ids_set) < len(node_features): # uniqueness kept
    unique_ids_set.add(random.random())

unique_ids = list(unique_ids_set)

node_features.insert(0, "Unique ID", unique_ids)

node_features

#### --- X = Node Feature Matrix ---

In [None]:
# TODO nodes should be bank accounts and not transactions. Bank accounts have unique receiving currencies and "bank BINs"
x = node_features.to_numpy()
x.shape # [num_nodes x num_features]


## EDGE MATRIX

In [None]:
# TODO add edge features --> look into EDGE LABELED GRAPHS where all nodes and edges have labels
# TODO create init for loading graph --> initial step
# TODO add 2 levels of depth --> will be interconnected, no need to do this step

### --- Finding Links: Mapping ---

In [None]:
links = [{'source': source, 'destination': destination} for source, destination in zip(data['Account'], data['Account.1'])]
links

### --- Edge Features ---

In [None]:
# TODO add edge features --> create matrix like those for nodes

edges_df = data[["Timestamp", "Amount Paid", "Payment Currency", "Payment Format"]]
edges_df

#### --- Payment Encoding ---

In [None]:
edges_amount = edges_df["Amount Paid"].astype(str)
edges_amount = list(edges_amount)

In [None]:
def count_unused_decimals(number):
    # Convert number to string to iterate through digits
    num_str = str(number)
    count = 0

    # Iterate through digits from the end
    for digit in reversed(num_str):
        # If the digit is '0', increment count
        if digit == '0':
            count += 1
        # If non-zero digit encountered, break the loop
        else:
            break
    
    # Remove trailing zeroes from the number
    num_str = num_str.rstrip('0')

    return num_str, count

In [None]:
maximum = str(max(edges_df["Amount Paid"]))
max_len = len(maximum.split(".")[0])

minimum = min(edges_df["Amount Paid"])
minimum = format(minimum, 'f')
min_len = len(str(minimum.split('.')[1]))

new_min, count = count_unused_decimals(minimum)
min_len = min_len - count

number_columns = max_len + min_len
number_columns

In [None]:
def split_into_vectors(table):
    lists = []
    for binary in table:
        binary = float(binary)
        binary = str(format(binary, 'f'))
        decimal_repr = []
        for bit in binary:
            if '.' not in bit:
                decimal_repr.append(str(int(bit)))
            else:
                decimal_repr.append(bit)
        lists.append(decimal_repr)
    return lists

In [None]:
def encode_payment_amount(df_col, max_len, min_len):
    new_payment_list = []
    for x in df_col:
        # print(x)
        index_of_decimal = x.index('.')
        positive_decimals = index_of_decimal
        negative_decimals = len(x) - (index_of_decimal+1)
        if positive_decimals < max_len:
            num_zeros = max_len - positive_decimals
            x = ['0'] * num_zeros + x
            # print(x)
            new_payment_list.append(x)
        elif negative_decimals < min_len:
            num_zeros = max_len - negative_decimals
            x = ['0'] * num_zeros + x
            # print(x)
            new_payment_list.append(x)
        else:
            new_payment_list.append(x)
        x.remove('.')
    return new_payment_list

In [None]:
a = split_into_vectors(edges_amount) # INEFFICIENT !!!! # INEFFICIENT !!!!# INEFFICIENT !!!!# INEFFICIENT !!!!# INEFFICIENT !!!!

In [None]:
new_payment_list = encode_payment_amount(a, max_len, min_len) # INEFFICIENT !!!! # INEFFICIENT !!!!# INEFFICIENT !!!!# INEFFICIENT !!!!# INEFFICIENT !!!!
new_payment_list = nested_list_int = [[int(item) for item in sublist] for sublist in new_payment_list]
new_payment_list

In [None]:
# Convert vectors into DataFrame
payment_vectors_df = pd.DataFrame(new_payment_list, columns=[f'payment_{i}' for i in range(len(new_payment_list[0]))])

In [None]:
edges_features = pd.concat([edges_df, payment_vectors_df], axis=1)
edges_features.drop("Amount Paid", axis='columns')

#### --- One-hot encoding: currency ---

In [None]:
# TODO convert Currency into one-hot encoding

positions = edges_features["Payment Currency"].str.split(",", expand=True) # creating new columns by splitting receiving currency --> all are added
edges_features["first_position"] = positions[0] # first currency in each row is extracted --> actual currency used and that we want as TRUE
# One-hot encoding 
edges_features = pd.concat([edges_features, pd.get_dummies(edges_features["first_position"],dtype='int')], axis=1, join='inner') # effectively adds actual currency to dummy variables/columns
edges_features.drop(["Amount Paid","Payment Currency", "first_position"], axis=1, inplace=True) # drop the axiliary columns
edges_features.head()

In [None]:
# TODO convert Payment Format

positions_2 = edges_features["Payment Format"].str.split(",", expand=True) 
edges_features["second_position"] = positions_2[0]
edges_features = pd.concat([edges_features, pd.get_dummies(edges_features["second_position"],dtype='int')], axis=1, join='inner') # effectively adds actual currency to dummy variables/columns
edges_features.drop(["Payment Format", "second_position"], axis=1, inplace=True) # drop the axiliary columns
edges_features.head()

#### --- One-hot encoding: time ---

In [None]:
# TODO convert timestamps

edges_features["Timestamp"] = pd.to_datetime(edges_features['Timestamp']).astype(int) // 10**9 # does not interpret time well... circular definition for months --> sinus calculations

In [None]:
edges_features.head()

### --- Y - Edge Feature Matrix

In [None]:
y = edges_features.to_numpy()
print(y[0:10])

# GRAPHICAL - nx

In [None]:
graph = nx.Graph()

transaction_limit = 800 # set a limit for graph visualization

for i in range(0,transaction_limit):
    u = links[i].get("source")
    v = links[i].get("destination")
    graph.add_edge(u,v,label=edges_amount[i]) # use edge labels for edge features?

# # get all links
# for i in range(len(links)):
#     u = links[i].get("source")
#     v = links[i].get("destination")
#     graph.add_edge(u,v,label=edge_attr[i][1]) # use edge labels for edge features?

print(graph.edges(data=True))

### --- Visualization ---

In [None]:
pos = nx.random_layout(graph) # shell, circular, spectral, spring, random, 

plt.figure(figsize=(25, 15))  # Increase figure size

nx.draw(
    graph, 
    pos, 
    node_size=300,  # Reduce node size for better visibility
    with_labels=True, 
    font_size=7, 
    font_weight='bold', 
    node_color='lightblue',  # Specify node color
    edge_color='gray',  # Specify edge color
    width=1,  # Adjust edge width
    arrows=True,  # Show arrows for directed edges
    arrowstyle='->',  # Specify arrow style
    arrowsize=20,  # Adjust arrow size
)

edge_labels = nx.get_edge_attributes(graph, 'label')
nx.draw_networkx_edge_labels(
    graph, 
    pos, 
    edge_labels=edge_labels, 
    label_pos=0.5,  # Adjust label position along edges
    font_size=7,  # Adjust font size
    font_color='green',  # Specify font color
)

plt.title(f'Graph Visualization of first {transaction_limit} transactions')  # Add title to the plot
plt.axis('off')  # Hide axis
plt.show()

In [None]:
# dictionaries 

degree_of_centrality = nx.degree_centrality(graph) # closeness_centrality, eigenvector_centrality, betweeness_centrality
betweenness_centrality = nx.betweenness_centrality(graph)

# TODO add statistics to a new dataframe

node_stat_features = pd.DataFrame()
node_stat_features['account'] = degree_of_centrality.keys()
node_stat_features['degree_of_centrality'] = degree_of_centrality.values()
node_stat_features['betweenness_centrality'] = betweenness_centrality.values()

node_stat_features

### --- Statistics as Features - Skip if not needed ---

In [None]:
# Add statistics df to the original df if necessary to include

# TODO format decimal point system to exclude power values.

# node_stat_features.drop('account')

# edges_features_w_stats = pd.concat([node_features, node_stat_features], axis=1)
# edges_features_w_stats

# ADJACENCY MATRIX - nx

### --- Loading full graph ---

In [None]:
# GETTING FULL GRAPH

graph_full = nx.Graph()

# get all links
for i in range(len(links)):
    u = links[i].get("source")
    v = links[i].get("destination")
    graph_full.add_edge(u,v,label=edges_amount[i]) # use edge labels for edge features?

In [None]:
print(len(edges_features))
print(len(links)) # number of transactions

In [None]:
print(len(unique_accounts))
print(graph_full.__len__()) # number of nodes in the graph

In [None]:
adjacency_matrix = nx.adjacency_matrix(graph_full)
adjacency_matrix = adjacency_matrix.todense()

In [None]:
print(adjacency_matrix) # index of nodes
print(len(adjacency_matrix))

# -------- !!! TODO !!! ----------

In [None]:
# What is node2vec?

# TODO BINs sould also be vectorized to avoid ordering
# TODO binary encoding for banks --> replace 300 features with 10 features that can represent a number in binary 
# TODO take random subset from transactions

# TODO calculating time --> circular definition for months (goes back to 0) --> sinus calculations --> same time different year cannot be distinguished in unix format
# TODO --> don't do yet.

# TODO payment amount --> separate based on power of 10s --> separate columns for thousands, hundreds etc.

# TODO no need to add nodes in order --> mapping thorugh dictionary is an option

# TODO potentially add statistics to feature matrix X


In [None]:
"""
NEXT 2 WEEKS:

1. Vectorize "BANKS" - BINs using binary encoding --> DONE!!!
2. Vectorize "Amount Paid" --> DONE!!!
3. Mapping through dictionary of all links --> No need to find individual links between accounts --> we are taking all accounts and the graph in general  --> DONE!!!
4. Graph Visualization should include labels that are actual accounts  --> DONE!!!
5. Add statistics to feature matrix X? --> if necessary --> DONE!!!
6. Apply graph on subset of 1000 transactions  --> DONE!!! --> not a problem at all, efficiency is better.
7. Create Adjacency Matrix --> can be done through networkx or through existing links --> DONE!!!
7a. Get unique nodes from both "Account" and "Account.1"  --> DONE!!!

7b. If laundering --> make node red

8. Create GNN model
8a. Create smaller graph
8b. Simple GNN, testing on dataset


Kernel keeps dying due to low RAM memory, I have to store the graph and variables somewhere else. All I need is the full_graph, x, y, and adjacency matrix.
- Save graph and model into Memory
- Use cuda

9. Write introduction
10. Write literature review

"""

# GNN

In [None]:
# node embedding: ADJACENCY MATRIX * NODE FEATURES

In [None]:
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define the GNN architecture
class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNN, self).__init__()
        self.gc1 = GraphConvolution(input_dim, hidden_dim)
        self.gc2 = GraphConvolution(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x, adj):
        x = self.relu(self.gc1(x, adj))
        x = self.relu(self.gc2(x, adj))
        x = self.fc(x)
        return x

class GraphConvolution(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GraphConvolution, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x, adj):
        out = torch.matmul(adj, x)  # Aggregate neighbor information
        out = self.linear(out)
        return out

# Prepare data
# Assuming x is node features, y is edge features, and adjacency_matrix is the adjacency matrix

# Define some constants
num_nodes = x.shape[0]
input_dim = x.shape[1]
output_dim = 1  # Binary classification: fraud or not
hidden_dim = 64

# Convert numpy arrays to PyTorch tensors
x = torch.FloatTensor(x)
y = torch.FloatTensor(y)
adjacency_matrix = torch.FloatTensor(adjacency_matrix)

# Split data into train and test sets
num_train_samples = 50000
train_indices = np.random.choice(num_nodes, num_train_samples, replace=False)
test_indices = np.setdiff1d(np.arange(num_nodes), train_indices)

x_train, x_test = x[train_indices], x[test_indices]
adj_train, adj_test = adjacency_matrix[train_indices][:, train_indices], adjacency_matrix[test_indices][:, test_indices]
y_train, y_test = y[train_indices], y[test_indices]

# Create DataLoader
train_dataset = TensorDataset(x_train, adj_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Initialize the model
model = GNN(input_dim, hidden_dim, output_dim)

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        x_batch, adj_batch, y_batch = batch
        output = model(x_batch, adj_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

# Evaluate the model
model.eval()
with torch.no_grad():
    output = model(x_test, adj_test)
    predictions = torch.sigmoid(output).round().squeeze().numpy()

# Assuming y_test contains the true labels
accuracy = np.mean(predictions == y_test.numpy())
print(f"Accuracy: {accuracy}")


## Creating Learnable Node Embeddings

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class GCNLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)
    
    def forward(self, adj_matrix, node_features):
        # Convert adjacency matrix to PyTorch tensor
        adj_matrix_tensor = torch.from_numpy(adj_matrix).float()
        # Normalize adjacency matrix
        adj_matrix_normalized = torch.matmul(adj_matrix_tensor, torch.diag(torch.pow(torch.sum(adj_matrix_tensor, dim=1), -0.5)))
        # Convert node features to PyTorch tensor
        node_features_tensor = torch.from_numpy(node_features).float()
        # Compute node embeddings
        node_embeddings = torch.matmul(adj_matrix_normalized, node_features_tensor)
        # Apply linear transformation
        output = self.linear(node_embeddings)
        # Apply ReLU activation function
        output = F.relu(output)
        return output

class GCN(nn.Module):
    def __init__(self, num_features, hidden_size, num_classes):
        super(GCN, self).__init__()
        self.layer1 = GCNLayer(num_features, hidden_size)
        self.layer2 = GCNLayer(hidden_size, num_classes)
    
    def forward(self, adj_matrix, node_features):
        h1 = self.layer1(adj_matrix, node_features)
        output = self.layer2(adj_matrix, h1)
        return output


In [None]:

# Example usage:
num_features = 10  # Number of node features
hidden_size = 16   # Hidden layer size
num_classes = 2    # Number of output classes
adj_matrix = torch.randn(10, 10)  # Example adjacency matrix
node_features = torch.randn(10, num_features)  # Example node features

# Create GCN model
model = GCN(num_features, hidden_size, num_classes)

# Forward pass
output = model(adjacency_matrix, x)


## SIMPLE GNN MODEL EXAMPLE

In [None]:
import torch
import torch.nn as nnd1
import torch.nn.functional as F

In [None]:
num_features =
hidden_dim =
num_classes =
num_epochs =
features =
adj =
labels =

In [None]:
class GCNBlock(nn.Module):
    def __init__(self, in_features, out_features):
        super(GCNBlock, self).__init__()
        self.linear = nn.Linear(in_features, out_features)
        
    def forward(self, x, adj):
        x = self.linear(x)
        x = torch.matmul(adj, x)
        x = F.relu(x)
        return x
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gcn1 = GCNBlock(input_dim, hidden_dim)
        self.gcn2 = GCNBlock(hidden_dim, output_dim)
        
    def forward(self, x, adj):
        x = self.gcn1(x, adj)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.gcn2(x, adj)
        return x
# Define the model
model = GCN(num_features, hidden_dim, num_classes)
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Train the model
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(features, adj)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()