## 0. Mengimport Modul dan Membaca Data

In [1]:
import pandas as pd
import numpy as np
import time
import torch
import torch_geometric
import scipy

In [None]:
# Loading xlsx file of real BankSim dataset
log_df = pd.read_excel('BankSim for Thesis.xlsx', sheet_name='BankSim for Thesis')

## 1. Membangun Matriks Ketetanggaan dan Objek Graf

In [4]:
start_time = time.time()

# Mengambil sampel data: 100 dari kelas fraud dan 7900 dari kelas bukan fraud
fraud_df = log_df.loc[log_df['fraud'] == 1].sample(100)
nofraud_df = log_df.loc[log_df['fraud'] == 0].sample(7900)
new_log_df = pd.concat([fraud_df, nofraud_df]).reset_index().drop(['index'], axis=1)
n = len(new_log_df)
new_log_df.to_csv('BankSim 8000.csv')

# Matriks ketetanggaan A: 1 jika terdapat 'customer' atau 'merchant' yang sama
A = np.zeros((n,n), dtype=np.int8)
for i in range(n):
    cus, mer = new_log_df.loc[i, 'customer'], new_log_df.loc[i, 'merchant']
    for j in range(i+1, n):
        if cus == new_log_df.loc[j, 'customer'] or mer == new_log_df.loc[j, 'merchant']:
            A[i,j] = 1
            A[j,i] = 1
    if i%100 == 0:
        print(i, time.time()-start_time)

# Membangun objek data graf berdasarkan A
sparse_A = scipy.sparse.csr_matrix(A)
G = torch_geometric.utils.from_scipy_sparse_matrix(sparse_A)
G_data = torch_geometric.data.Data(edge_index=G[0])

end_time = time.time() - start_time

print('Time to construct data object based on A:', end_time)

0 0.23177099227905273
100 15.52385950088501
200 29.751585245132446
300 43.87968897819519
400 57.88134145736694
500 72.18541121482849
600 86.0971565246582
700 99.68208718299866
800 113.44995164871216
900 127.20380687713623
1000 140.97418093681335
1100 156.10906529426575
1200 171.27634525299072
1300 183.64107966423035
1400 195.57264399528503
1500 207.4504954814911
1600 219.06817507743835
1700 230.8986246585846
1800 242.28113675117493
1900 253.54528999328613
2000 264.4571764469147
2100 275.20553708076477
2200 285.7404053211212
2300 296.13619470596313
2400 306.43857741355896
2500 316.63161063194275
2600 326.7220025062561
2700 336.61636209487915
2800 346.3754494190216
2900 356.0019338130951
3000 365.505464553833
3100 374.8241512775421
3200 384.0267868041992
3300 393.1357321739197
3400 402.1530177593231
3500 410.977014541626
3600 419.68188095092773
3700 428.2264053821564
3800 436.6850039958954
3900 445.56656527519226
4000 453.8471429347992
4100 462.15810203552246
4200 470.19810914993286
4300

## 3. Membangun matriks fitur X dan label titik y

In [None]:
start_time = time.time()

# Fitur titik: 'category', 'amount', 'customer age', 'customer gender'
nodes_feature = pd.get_dummies(new_log_df['category'])
nodes_feature['weight'] = new_log_df['amount']
nodes_feature['cus_age'] = new_log_df['age']
nodes_feature['cus_gender'] = new_log_df['gender']
nodes_feature = torch.tensor(nodes_feature.values)

# Label titik: fraud atau bukan fraud
nodes_label = torch.tensor(new_log_df['fraud'].values)
G_data.x = nodes_feature
G_data.y = nodes_label
G_data.num_nodes = n

print('Time to construct feature matrix X and label vector y: ', time.time()-start_time)

## 4. Menyimpan dataset graf

In [None]:
torch.save(G_data, 'BankSim 8000')