In [1]:
import pandas as pd
import torch
import networkx as nx
from matplotlib.pyplot import figure
from torch_geometric.data import Dataset, Data, DataLoader
from scipy.linalg import fractional_matrix_power
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from torch_geometric.data import InMemoryDataset
from torch_geometric.loader import DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datetime import date
import time

In [23]:
path = '../blockchain_files/4000000to4999999_BlockTransaction/'

In [24]:
df = pd.read_csv(path+'processed.csv')

In [25]:
tx_counts = pd.read_csv(path+'tx_counts.csv')

In [26]:
tx_labels = tx_counts[['0','1']]

In [27]:
merged_df= df.merge(tx_labels,left_on='from', right_on='1',how='left')

In [28]:
merged_df.drop(columns=['1'],inplace=True)

In [29]:
merged_df.head()

Unnamed: 0,timestamp,from,to,value,gasLimit,gasPrice,gasUsed,0
0,1499633567,0x7ed1e469fcb3ee19c0366d829e291451be638e59,0x1c48b312f5b68fb8826f3eb15ba00fce2530ec7d,367600000000000000,21000,60000000000,21000,walletapp
1,1499633567,0x32be343b94f860124dc4fee278fdcbd38c102d88,0x7760d83e1ecf2c7f02b767e869d40571c76788b9,6130841830000000000,100000,50000000000,21000,exchange
2,1499633567,0x267be1c1d684f78cb4f6a176c4911b741e4ffdc0,0x3fc232497fe367f6c4c90b04d1b7056a8b031f03,7995000000000000000,30000,22050000000,21000,exchange
3,1499633567,0x5aa0c28181c359eac5990faaa95224a649e8a5b4,0xa1286364b21fd48256090b485684aee8f6c7d024,2000000000000000,21000,21000000000,21000,
4,1499633567,0x49c48176e317ff9dcafd0e8e4edb70c5da7f2df6,0xa981ff0d9d9d505a6fd212619d1ed718bf92a224,500000000000000000,21000,21000000000,21000,


In [30]:
def receiving_transaction_graph_h4_timed(node,st_time,end_time,dataframe):
    
    timed_df = dataframe[dataframe['timestamp'].between(st_time,end_time)]
    
    neigh1 = timed_df[timed_df['to'] == node]
    
    neigh2 = timed_df[timed_df['to'].isin(neigh1['from'])]
    neigh2=neigh2[neigh2['to'] != node]
    
    neigh3 = timed_df[timed_df['to'].isin(neigh2['from'])]
    neigh3 = neigh3[~neigh3['to'].isin(neigh2['to'])]
    
    neigh4 = timed_df[timed_df['to'].isin(neigh3['from'])]
    neigh4 = neigh4[~neigh4['to'].isin(neigh3['to'])]
    
    return (neigh1,neigh2,neigh3,neigh4)
    
    

In [31]:
def send_extract_graph_h4_timed(node,st_time,end_time,dataframe):
    
    timed_df = dataframe[dataframe['timestamp'].between(st_time,end_time)]
    
    neigh1 = timed_df[timed_df['from'] == node]
    
    neigh2 = timed_df[timed_df['from'].isin(neigh1['to'])]
    neigh2=neigh2[neigh2['from'] != node]
    
    neigh3 = timed_df[timed_df['from'].isin(neigh2['to'])]
    neigh3 = neigh3[~neigh3['from'].isin(neigh2['from'])]
    
    neigh4 = timed_df[timed_df['from'].isin(neigh3['to'])]
    neigh4 = neigh4[~neigh4['from'].isin(neigh3['from'])]
    
    return (neigh1,neigh2,neigh3,neigh4)
    
    


In [32]:
st_time = 1484475035
duration = 700000
end_time = st_time+duration
addr = '0xea674fdde714fd979de3edf0f56aa9716b898ec8'
(neigh1,neigh2,neigh3,neigh4)=send_extract_graph_h4_timed(addr,st_time,end_time,df)
neigh1.shape

(0, 7)

In [33]:
nodes = pd.concat([df['from'],df['to']]).unique()
nodes.shape

(18338866,)

In [34]:
map_id = {j:i for i,j in enumerate(nodes)}

In [35]:
merged_df['from'] = merged_df['from'].map(map_id)

In [36]:
merged_df['to'] = merged_df['to'].map(map_id)

In [37]:
merged_df['value'] = merged_df['value'].astype(float)/(10**18)

In [38]:
merged_df['gasPrice'] = merged_df['gasPrice'].astype(float)/(10**10)

In [39]:
merged_df['gasLimit'] = merged_df['gasLimit']/10000

In [40]:
merged_df['gasUsed'] = merged_df['gasUsed']/10000

In [41]:
merged_df.head()

Unnamed: 0,timestamp,from,to,value,gasLimit,gasPrice,gasUsed,0
0,1499633567,0,225089,0.3676,2.1,6.0,2.1,walletapp
1,1499633567,1,14157513,6.130842,10.0,5.0,2.1,exchange
2,1499633567,2,168,7.995,3.0,2.205,2.1,exchange
3,1499633567,3,12422636,0.002,2.1,2.1,2.1,
4,1499633567,4,1661887,0.5,2.1,2.1,2.1,


In [42]:
merged_df['0'].unique()

array(['walletapp', 'exchange', nan, 'mining', 'compromised',
       'phish_hack', 'gambling', 'isowallet'], dtype=object)

In [43]:
labels = merged_df['0'].unique()
for label in labels:
    print('%s: '% label,(merged_df['0'] == label).sum())
# print('mining: ',(merged_df['0'] == 'mining').sum())

walletapp:  151664
exchange:  7430366
nan:  0
mining:  9757321
compromised:  199476
phish_hack:  3374
gambling:  2909
isowallet:  2172


In [44]:
with open(path+'final.csv', 'w') as f:
        merged_df.to_csv(f,index=False,header=True)