In [1]:
import os
import pandas as pd

In [2]:
dataset = pd.read_csv("../data/paysim_dataset.csv")
dataset = dataset[dataset["step"] < 4]
dataset.to_csv("../data/paysim_3_timesteps.csv")

In [3]:
dataset.columns = ['step',
                   'type',
                   'amount',
                   'nameOrig',
                   'oldbalanceOrig',
                   'newbalanceOrig',
                   'nameDest',
                   'oldbalanceDest',
                   'newbalanceDest',
                   'isFraud',
                   'isFlaggedFraud']

In [4]:
def generatePaysimNodesAttributes(dataset, timestep=None):

    if timestep is not None:
        dataset = dataset[dataset["step"] == timestep]

    paysim_nodes = dataset[["nameOrig", "isFraud", "nameDest"]]
    paysim_nodes.columns = ["nameOrig", "class", "nameDest"]
    paysim_nodes = paysim_nodes.assign(label="node")
    return paysim_nodes

In [5]:
def getAllNodes(paysim_nodes):
    initial_nodes = list(paysim_nodes["nameOrig"])

    for node in paysim_nodes["nameDest"]:
        initial_nodes.append(node)

    all_nodes = pd.DataFrame(initial_nodes)[0].unique()
    all_nodes = pd.DataFrame(all_nodes)
    all_nodes.columns = ["nodeId"]
    return all_nodes

In [6]:
def createNodesDataFile(dataset, timestep=None):
    paysim_nodes = generatePaysimNodesAttributes(dataset=dataset, timestep=timestep)
    all_nodes = getAllNodes(paysim_nodes)
    fraudulent_nodes = getAllNodes(paysim_nodes[paysim_nodes["class"] == 1][["nameOrig", "nameDest"]])
    fraudulent_nodes["class"] = 1
    merged = pd.merge(fraudulent_nodes, all_nodes, on="nodeId", how="outer")
    merged.fillna(0, inplace=True)
    merged["class"] = merged["class"].astype("uint8")
    merged = merged.assign(label="node")

    export_path_postfix = ""

    if timestep is not None:
        export_path_postfix = str(timestep)

    if not os.path.isdir(f"../export{export_path_postfix}"):
        os.mkdir(f"../export{export_path_postfix}")

    merged.to_csv(f"../export{export_path_postfix}/nodes_data.csv", header=False, index=False)

    return 0

In [7]:
def createEdgesDataFile(dataset, timestep=None):

    edges_data = dataset.copy()
    edges_data = edges_data.assign(cost=1)
    edges_data = edges_data.assign(Type="TRANSACTION")
    attributes_in_order = ["nameOrig", "cost", "step", "type", "amount", "oldbalanceOrig", "newbalanceOrig", "oldbalanceDest", "newbalanceDest", "isFraud", "isFlaggedFraud", "nameDest", "Type"]
    ordered_edge_data = edges_data[attributes_in_order]
    ordered_edge_data.columns = ["nameOrig", "cost", "step", "transactionCategory", "amount", "oldbalanceOrig", "newbalanceOrig", "oldbalanceDest", "newbalanceDest", "isFraud", "isFlaggedFraud", "nameDest", "Type"]

    if timestep is not None:
        ordered_edge_data = ordered_edge_data[ordered_edge_data["step"] == timestep]

    export_path_postfix = ""

    if timestep is not None:
        export_path_postfix = str(timestep)

    if not os.path.isdir(f"../export{export_path_postfix}"):
        os.mkdir(f"../export{export_path_postfix}")


    ordered_edge_data.to_csv(f"../export{export_path_postfix}/edges_data.csv", header=False, index=False)

    return 0

In [8]:
def createNodesHeaderFile(timestep=None):

    export_path_postfix = ""
    if timestep is not None:
        export_path_postfix = str(timestep)

    if not os.path.isdir(f"../export{export_path_postfix}"):
        os.mkdir(f"../export{export_path_postfix}")

    nodes_header = "id:ID,class:int,label:LABEL"

    with open(f"../export{export_path_postfix}/nodes_header.csv", 'w') as fh:
        fh.write(nodes_header)

    return 0

In [9]:
def createEdgesHeaderFile(timestep=None):

    export_path_postfix = ""
    if timestep is not None:
        export_path_postfix = str(timestep)

    if not os.path.isdir(f"../export{export_path_postfix}"):
        os.mkdir(f"../export{export_path_postfix}")

    edges_header = ":START_ID,cost:int,step:long,type,amount:double,oldbalanceOrig:double,newbalanceOrg:double,oldbalanceDest:double,newbalanceDest:double,isFraud:long,isFlaggedFraud:long,:END_ID,:TYPE"

    with open(f"../export{export_path_postfix}/edges_header.csv", 'w') as fh:
        fh.write(edges_header)

    return 0

In [10]:
createNodesDataFile(dataset, timestep=None)
createNodesHeaderFile()
createEdgesDataFile(dataset=dataset)
createEdgesHeaderFile()

0

In [11]:
pd.read_csv("../export/nodes_header.csv")

Unnamed: 0,id:ID,class:int,label:LABEL


In [12]:
pd.read_csv("../export/nodes_data.csv").head()

Unnamed: 0,C1305486145,1,node
0,C840083671,1,node
1,C1420196421,1,node
2,C2101527076,1,node
3,C137533655,1,node
4,C1118430673,1,node


In [13]:
pd.read_csv("../export/edges_header.csv")

Unnamed: 0,:START_ID,cost:int,step:long,type,amount:double,oldbalanceOrig:double,newbalanceOrg:double,oldbalanceDest:double,newbalanceDest:double,isFraud:long,isFlaggedFraud:long,:END_ID,:TYPE


In [14]:
pd.read_csv("../export/edges_data.csv").head()

Unnamed: 0,C1231006815,1,1.1,PAYMENT,9839.64,170136.0,160296.36,0.0,0.0.1,0,0.1,M1979787155,TRANSACTION
0,C1666544295,1,1,PAYMENT,1864.28,21249.0,19384.72,0.0,0.0,0,0,M2044282225,TRANSACTION
1,C1305486145,1,1,TRANSFER,181.0,181.0,0.0,0.0,0.0,1,0,C553264065,TRANSACTION
2,C840083671,1,1,CASH_OUT,181.0,181.0,0.0,21182.0,0.0,1,0,C38997010,TRANSACTION
3,C2048537720,1,1,PAYMENT,11668.14,41554.0,29885.86,0.0,0.0,0,0,M1230701703,TRANSACTION
4,C90045638,1,1,PAYMENT,7817.71,53860.0,46042.29,0.0,0.0,0,0,M573487274,TRANSACTION
