In [1]:
import time
import json

import pandas as pd
import numpy as np
from sklearn import preprocessing, feature_extraction, model_selection
from sklearn.metrics import mean_squared_error, accuracy_score, plot_confusion_matrix, f1_score
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression

import stellargraph as sg
from stellargraph import datasets
from stellargraph.mapper import (
    CorruptedGenerator,
    FullBatchNodeGenerator,
    GraphSAGENodeGenerator,
    HinSAGENodeGenerator,
    Node2VecNodeGenerator,
    ClusterNodeGenerator,
)
from stellargraph.layer import GCN, DeepGraphInfomax, GraphSAGE, GAT, APPNP, HinSAGE, Dense

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import Model, optimizers, losses, metrics

import multiprocessing
from IPython.display import display, HTML
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import sys
sys.path.append('../')
import utils

n = 10000
v_sets, e_sets, core_targets, ext_targets, v_sample, e_sample = utils.load_for_jupyter(n)

Dataset already downloaded. Loading it from file system
LOADING DATA: 1.04 s
SUBSAMPLING: 0.05 s
PREPROCESSING: 0.05 s


# Adding "Fraudolent" field

In [126]:
for set in v_sets:
    v_sets[set]['Fraudolent'] = np.where(
    np.logical_or(v_sets[set]['CoreCaseGraphID'] != 0.0, v_sets[set]['ExtendedCaseGraphID'] != 0.0), '1', '0')

In [127]:
v_sets['Account']

Unnamed: 0_level_0,Revenue Size Flag,CoreCaseGraphID,ExtendedCaseGraphID,Fraudolent
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1502001,1,0.0,0.0,0
15020013,2,1566.0,0.0,1
15020014,1,0.0,0.0,0
15020028,4,0.0,0.0,0
15020029,4,0.0,0.0,0
...,...,...,...,...
15020147942,5,0.0,0.0,0
15020148265,5,0.0,0.0,0
15020148981,4,0.0,0.0,0
15020149051,3,0.0,0.0,0


In [129]:
v_sets['Customer']

Unnamed: 0_level_0,Income Size Flag,CoreCaseGraphID,ExtendedCaseGraphID,Person or Organisation_Organisation,Person or Organisation_Person,Fraudolent
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100109638,2,0.0,151.0,0,1,1
100109640,3,0.0,0.0,0,1,0
100109645,3,0.0,0.0,1,0,0
100109660,2,0.0,2032.0,1,0,1
100109669,2,0.0,0.0,1,0,0
...,...,...,...,...,...,...
1001028535,2,0.0,0.0,1,0,0
1001028539,1,0.0,0.0,0,1,0
1001028701,1,0.0,0.0,1,0,0
1001028723,3,0.0,1207.0,1,0,1


In [130]:
v_sets['Address']

Unnamed: 0_level_0,CoreCaseGraphID,ExtendedCaseGraphID,Fraudolent
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
250117336,0.0,0.0,0
250117576,0.0,0.0,0
250117577,0.0,0.0,0
250117721,0.0,0.0,0
250118066,0.0,0.0,0
...,...,...,...
250118447,0.0,0.0,0
250118550,0.0,0.0,0
250118722,0.0,0.0,0
250118774,0.0,0.0,0


In [131]:
v_sets['Derived Entity']

Unnamed: 0_level_0,CoreCaseGraphID,ExtendedCaseGraphID,Person or Organisation_Organisation,Person or Organisation_Person,Fraudolent
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
200300196,0.0,1761.0,1,0,1
200300233,3167.0,3167.0,1,0,1
200300236,0.0,1313.0,1,0,1
200300241,0.0,2561.0,1,0,1
200300309,2477.0,0.0,0,1,1
...,...,...,...,...,...
20030017515,0.0,0.0,1,0,0
20030017522,0.0,351.0,0,1,1
20030017534,0.0,0.0,0,1,0
20030017538,0.0,33.0,1,0,1


In [132]:
v_sets['External Entity']

Unnamed: 0_level_0,CoreCaseGraphID,ExtendedCaseGraphID,Person or Organisation_Organisation,Person or Organisation_Person,Fraudolent
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3001130070,0.0,995.0,0,1,1
3001130084,0.0,0.0,0,1,0
3001130109,0.0,0.0,0,1,0
3001130150,0.0,0.0,1,0,0
3001130264,0.0,3147.0,1,0,1
...,...,...,...,...,...
3001177556,0.0,0.0,0,1,0
3001177641,0.0,0.0,0,1,0
3001177649,0.0,0.0,0,1,0
3001177675,0.0,0.0,0,1,0
