In [1]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('./input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

./input/elliptic_txs_features_test.csv
./input/elliptic_txs_classes.csv
./input/elliptic_txs_edgelist.csv
./input/elliptic_txs_features.csv


In [2]:
import pandas as pd 
import networkx as nx
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

id_time=["txId", "time_step"]
feature_names = ['feature_'+str(i) for i in range(1,166)]
column_names = id_time + feature_names
elliptic_classes = pd.read_csv('./input/elliptic_txs_classes.csv')
elliptic_classes.columns = ['txId', 'class_label']
elliptic_edgelist = pd.read_csv('./input/elliptic_txs_edgelist.csv')
elliptic_features = pd.read_csv('./input/elliptic_txs_features.csv', names=column_names)
elliptic_features["centrality"] =None

## Basic Model

In [6]:
import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import EllipticBitcoinDataset
#from torch_geometric.logging import init_wandb, log
from torch_geometric.nn import GCNConv

dataset = EllipticBitcoinDataset(root='./pytorch_input')

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        # x: Node feature matrix of shape [num_nodes, in_channels]
        # edge_index: Graph connectivity matrix of shape [2, num_edges]
        #x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv1(x, edge_index).relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index).relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        return x


In [49]:
simplemodel = GCN(dataset.num_features, 32, dataset.num_classes+1)
data=dataset[0]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
labeled = data.y>0
randomize = torch.FloatTensor(data.y.shape[0]).uniform_()
classweight = torch.FloatTensor([0,int((data.y == 2).sum()),int((data.y == 1).sum())])
data.train_mask = torch.logical_and(data.y > 0,randomize>0.2)# 80% of labeled
data.test_mask = torch.logical_and(data.y > 0,randomize< 0.2)# rest 20% of labeled
# init_wandb(name=f'GCN-{args.dataset}', lr=args.lr, epochs=args.epochs,            hidden_channels=args.hidden_channels, device=device)
simplemodel, data = simplemodel.to(device), data.to(device)
optimizer = torch.optim.Adam([ dict(params=simplemodel.conv1.parameters(), weight_decay=5e-4), 
dict(params=simplemodel.conv2.parameters(), weight_decay=5e-4),
dict(params=simplemodel.conv3.parameters(), weight_decay=0)], lr=0.005)  # Only perform weight-decay on first convolution.


def train(model,d):
    model.train()
    optimizer.zero_grad()
    out = model(d.x, d.edge_index)
    #print(out.shape,new_y.shape)
    #print(data.y[data.test_mask].shape)
    loss = F.cross_entropy(out[d.train_mask], d.y[d.train_mask],weight=classweight)
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test(model,d):
    model.eval()
    pred = model(d.x, d.edge_index).argmax(dim=-1)
    accs = []
    for mask in [d.train_mask, d.test_mask]:
        accs.append(int((pred[mask] == d.y[mask]).sum()) / int(mask.sum()))
    return accs


    #log(Epoch=epoch, Loss=loss, Train=train_acc, Val=0, Test=test_acc)

In [None]:

best_val_acc = final_test_acc = 0
for epoch in range(1, 21):
    loss = train(simplemodel,data)
    train_acc, tmp_test_acc = test(simplemodel,data)
    print(epoch,loss,train_acc,tmp_test_acc)

## Generate New Centrality Feature

In [13]:
time_steps = list(range(1,17))
graphs = []
for time_step in time_steps:
    extract_nodes = list(set(elliptic_features[elliptic_features['time_step']==time_step]['txId'].values.tolist()))
    edgelist_extract = elliptic_edgelist[elliptic_edgelist['txId1'].isin(extract_nodes) & elliptic_edgelist['txId2'].isin(extract_nodes)].values.tolist()
    edgelist = [tuple(row) for row in edgelist_extract]
    G = nx.DiGraph()
    G.add_edges_from(edgelist)
    graphs.append(G)
    centrality = nx.in_degree_centrality(G)
    print(len(centrality.keys()))
    for id in centrality.keys():
        elliptic_features.loc[(elliptic_features["txId"] == id) &(elliptic_features['time_step']==time_step) ,"centrality"] = centrality[id]
    #elliptic_features = pd.merge(elliptic_features,subset[["txId","time_step","new"]],on = ["txId","time_step"],how="left")
    #print(time_step,len(subset),len(centrality))
extract_nodes = list(set(elliptic_features[elliptic_features['time_step'].isin(time_steps)]['txId'].values.tolist()))

7880
4544
6621
5693
6803
4328
6048
4457
4996
6727
4296
2047
4528
2022
3639
2975


In [46]:
new = dataset[0]
new.x = elliptic_features
new.x = torch.Tensor(new.x[new.x.columns[2:]].values.astype(np.float32))
new.x = torch.nan_to_num(new.x, nan=0)
modelnew = GCN(dataset.num_features+1, 16, dataset.num_classes+1)

In [53]:
randomize = torch.FloatTensor(data.y.shape[0]).uniform_()
new.train_mask = torch.logical_and(data.y > 0,randomize>0.2)# 80% of labeled
new.test_mask = torch.logical_and(data.y > 0,randomize< 0.2)# rest 20% of labeled
modelnew, new = modelnew.to(device), new.to(device)
best_val_acc = final_test_acc = 0
train_acc = tmp_test_acc = 0
for epoch in range(1, 21):
    loss = train(modelnew,new)
    train_acc, tmp_test_acc = test(modelnew,new)
    print(epoch,loss,train_acc,tmp_test_acc)

1 1.5124796628952026 0.046919845586130604 0.04648013051374396
2 1.5283610820770264 0.046919845586130604 0.04648013051374396
3 1.5279396772384644 0.046919845586130604 0.04648013051374396
4 1.5333728790283203 0.046919845586130604 0.04648013051374396
5 1.518808126449585 0.046919845586130604 0.04648013051374396
6 1.5292044878005981 0.046919845586130604 0.04648013051374396
7 1.522578477859497 0.046919845586130604 0.04648013051374396
8 1.5254406929016113 0.046919845586130604 0.04648013051374396
9 1.5326306819915771 0.046919845586130604 0.04648013051374396
10 1.5192081928253174 0.046919845586130604 0.04648013051374396
11 1.5253543853759766 0.046919845586130604 0.04648013051374396
12 1.5113784074783325 0.046919845586130604 0.04648013051374396
13 1.5392752885818481 0.046919845586130604 0.04648013051374396
14 1.5097583532333374 0.046919845586130604 0.04648013051374396
15 1.5169997215270996 0.046919845586130604 0.04648013051374396


KeyboardInterrupt: 