In [1]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('./input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

./input/elliptic_txs_features_test.csv
./input/elliptic_txs_classes.csv
./input/elliptic_txs_edgelist.csv
./input/elliptic_txs_features.csv


In [2]:
import pandas as pd 
import networkx as nx
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

id_time=["txId", "time_step"]
feature_names = ['feature_'+str(i) for i in range(1,166)]
column_names = id_time + feature_names
elliptic_classes = pd.read_csv('./input/elliptic_txs_classes.csv')
elliptic_classes.columns = ['txId', 'class_label']
elliptic_edgelist = pd.read_csv('./input/elliptic_txs_edgelist.csv')
elliptic_features = pd.read_csv('./input/elliptic_txs_features.csv', names=column_names)
elliptic_features["centrality"] =None

## Basic Model

In [6]:
import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import EllipticBitcoinDataset
#from torch_geometric.logging import init_wandb, log
from torch_geometric.nn import GCNConv

dataset = EllipticBitcoinDataset(root='./pytorch_input')

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        # x: Node feature matrix of shape [num_nodes, in_channels]
        # edge_index: Graph connectivity matrix of shape [2, num_edges]
        #x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv1(x, edge_index).relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index).relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        return x


In [12]:
simplemodel = GCN(dataset.num_features, 32, dataset.num_classes+1)
data=dataset[0]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
labeled = data.y>0
randomize = torch.FloatTensor(data.y.shape[0]).uniform_()
classweight = torch.FloatTensor([0,int((data.y == 2).sum()),int((data.y == 1).sum())])
data.train_mask = torch.logical_and(data.y > 0,randomize>0.2)# 80% of labeled
data.test_mask = torch.logical_and(data.y > 0,randomize< 0.2)# rest 20% of labeled
# init_wandb(name=f'GCN-{args.dataset}', lr=args.lr, epochs=args.epochs,            hidden_channels=args.hidden_channels, device=device)
simplemodel, data = simplemodel.to(device), data.to(device)
optimizer = torch.optim.Adam([ dict(params=simplemodel.conv1.parameters(), weight_decay=5e-4), 
dict(params=simplemodel.conv2.parameters(), weight_decay=0),
dict(params=simplemodel.conv3.parameters(), weight_decay=0)], lr=0.001)  # Only perform weight-decay on first convolution.


def train(model):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    #print(out.shape,new_y.shape)
    #print(data.y[data.test_mask].shape)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask],weight=classweight)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test(model):
    model.eval()
    pred = model(data.x, data.edge_index).argmax(dim=-1)
    accs = []
    for mask in [data.train_mask, data.test_mask]:
        accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
    return accs


best_val_acc = final_test_acc = 0
for epoch in range(1, 21):
    loss = train(simplemodel)
    train_acc, tmp_test_acc = test(simplemodel)
    print(epoch,loss,train_acc,tmp_test_acc)
    #log(Epoch=epoch, Loss=loss, Train=train_acc, Val=0, Test=test_acc)

1 2.156799077987671 0.023357382615070026 0.024350800808583425
2 2.045508623123169 0.03886724024846638 0.04046027056445343
3 1.9586296081542969 0.0637138778502257 0.06443787902348001
4 1.8785839080810547 0.09489563640572553 0.09466645933758358
5 1.8077332973480225 0.12873181835718972 0.12753848546104804
6 1.7099274396896362 0.16363285620587215 0.16404913699269166
7 1.652165174484253 0.20097225973224275 0.20301663815891774
8 1.5695149898529053 0.2442378178170454 0.24605815580780593
9 1.5155141353607178 0.2909911647825919 0.2919919141657596
10 1.4614330530166626 0.33631698753809947 0.33730368527445187
11 1.4131238460540771 0.3754234345460859 0.3765199813403825
12 1.3671588897705078 0.41300976117905785 0.4140880111957705
13 1.3385276794433594 0.4434893321501601 0.4448452806717462
14 1.293747067451477 0.46601334928045063 0.4666148343958949
15 1.2611244916915894 0.4840078706740229 0.48356398693826774
16 1.2218513488769531 0.49596820865002506 0.49675011662260926
17 1.1904600858688354 0.505775

## Generate New Centrality Feature

In [13]:
time_steps = list(range(1,17))
graphs = []
for time_step in time_steps:
    extract_nodes = list(set(elliptic_features[elliptic_features['time_step']==time_step]['txId'].values.tolist()))
    edgelist_extract = elliptic_edgelist[elliptic_edgelist['txId1'].isin(extract_nodes) & elliptic_edgelist['txId2'].isin(extract_nodes)].values.tolist()
    edgelist = [tuple(row) for row in edgelist_extract]
    G = nx.DiGraph()
    G.add_edges_from(edgelist)
    graphs.append(G)
    centrality = nx.in_degree_centrality(G)
    print(len(centrality.keys()))
    for id in centrality.keys():
        elliptic_features.loc[(elliptic_features["txId"] == id) &(elliptic_features['time_step']==time_step) ,"centrality"] = centrality[id]
    #elliptic_features = pd.merge(elliptic_features,subset[["txId","time_step","new"]],on = ["txId","time_step"],how="left")
    #print(time_step,len(subset),len(centrality))
extract_nodes = list(set(elliptic_features[elliptic_features['time_step'].isin(time_steps)]['txId'].values.tolist()))

7880
4544
6621
5693
6803
4328
6048
4457


In [None]:
new = dataset[0]
new.x = elliptic_features
new.x = torch.Tensor(new.x[new.x.columns[2:]].values.astype(np.float32))
modelnew = GCN(dataset.num_features+1, 16, dataset.num_classes+1)

In [None]:
best_val_acc = final_test_acc = 0
for epoch in range(1, 21):
    loss = train(modelnew)
    train_acc, tmp_test_acc = test(modelnew)
    print(epoch,loss,train_acc,tmp_test_acc)