In [2]:
# Importing libraries
from neo4j import GraphDatabase
from py2neo import Graph
import numpy as np
import pandas as pd
import json
import os
from datetime import datetime
import pytz

In [3]:
# Logging
# If logging is set to true it will record the creation of every node and relationship in a log file called "logs"
logging = True

In [4]:
def getTimeStamp ():
    
    EST = pytz.timezone("EST")
    datetime_est = datetime.now(EST)
    ct = datetime_est.strftime('%Y:%m:%d %H:%M:%S %Z %z')
    return str(ct)

In [5]:
def createBlockNode (sess, blk):
    
    ps = "CREATE (n:block {" 
    p1 = "hash: '" + str(blk["hash"]) + "', "
    p2 = "version: '" + str(blk["version"]) + "', "
    p3 = "previousBlockHash: '" + str(blk["previousblockhash"]) + "', "
    p4 = "merkleRoot: '" + str(blk["merkleroot"]) + "', "
    p5 = "time: '" + str(blk["time"]) + "', "
    p6 = "difficulty: '" + str(blk["difficulty"]) + "', "
    p7 = "nonce: '" + str(blk["nonce"]) + "', "
    p8 = "numTranactions: '" + str(blk["nTx"]) +"' "
    pl = "}) RETURN id(n)"
    
    cmd1 = ps + p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + pl
    
    if logging:
        print(getTimeStamp() + " creating node : block > hash_id : " + str(blk["hash"]))
    
    ret = sess.run(cmd1)
    
    node_id = ret.data()[0]["id(n)"]
    
    return node_id

In [6]:
def createCoinbaseNode (sess, tx):
    
    ps = "CREATE (n:coinbase {"
    p1 = "value: '" + str(tx["vout"][0]["value"]) +"' "
    pl = "}) RETURN id(n)"
    
    cmd1 = ps + p1 + pl
    
    if logging:
        print(getTimeStamp() + " creating node : coinbase > value : " + str(tx["vout"][0]["value"]))
    
    ret = sess.run(cmd1)
    
    node_id = ret.data()[0]["id(n)"]
    
    return node_id

In [7]:
def createTxNode (sess, tx):
    
    ps = "CREATE (n:tx {"
    p1 = "txid: '" + str(tx["txid"]) +"', "
    p2 = "version: '" + str(tx["version"]) +"' "
    pl = "}) RETURN id(n)"
    
    cmd1 = ps + p1 + p2 + pl
    
    if logging:
        print(getTimeStamp() + " creating node : tx > txid : " + str(tx["txid"]))
    
    ret = sess.run(cmd1)
    
    node_id = ret.data()[0]["id(n)"]
    
    return node_id

In [8]:
def createOutputNode(sess, tx, ind):
    
    ps = "CREATE (n:output {"
    p1 = "vout: '" + str(ind) + "', "
    p2 = "value: '" + str(tx["value"]) +"', "
    p3 = "scriptPK_hex: '" + str(tx["scriptPubKey"]["hex"]) +"', "
    p4 = "scriptPK_asm: '" + str(tx["scriptPubKey"]["asm"]) +"', "
    p5 = "type: '" + str(tx["scriptPubKey"]["type"]) +"', "
    p6 = "address: '" + str(tx["scriptPubKey"]["address"]) +"' "
    pl = "}) RETURN id(n)"
    
    cmd1 = ps + p1 + p2 + p3+ p4 + p5 + p6 + pl
    
    if logging:
        print(getTimeStamp() + " creating node : output > value : " + str(tx["value"]))
    
    ret = sess.run(cmd1)
    
    node_id = ret.data()[0]["id(n)"]
    
    return node_id
    

In [9]:
def checkAddressExists (sess, addr):
    
    cmd1 = "MATCH (n:address) WHERE n.address = '" + addr + "' RETURN n"
    
    ret = sess.run(cmd1)
    
    # print(ret.data()[0]["n"]["address"])
    
    if len(ret.data()) == 0:
        return False
    else:
        return True
        

In [10]:
def createAddressNode(sess, address):
        
    exists = False
    
    exists = checkAddressExists(sess, address)

    if not exists :
        
        ps = "CREATE (n:address {"
        p1 = "address: '" + address +"' "
        pl = "}) RETURN id(n)"
        
        cmd1 = ps + p1 + pl
        
        if logging:
            print(getTimeStamp() + " creating node : address > address : " + address)
        
        ret = sess.run(cmd1)

        node_id = ret.data()[0]["id(n)"]
        
        return node_id
            
        


In [11]:
def createChainRel(sess, prevBlkHash, n4j_blk_id):

    cmd1 = """ 
            MATCH
            (a:block),
            (b:block)
            WHERE 
            a.hash = "{0}"
            AND
            id(b) = {1}
            CREATE (a)-[r:chain]->(b)
            RETURN type(r)
            """
            
    cmd1 = cmd1.format(prevBlkHash, str(n4j_blk_id))
    
    if logging:
        print(getTimeStamp() + " creating relationship : chain")
    
    sess.run(cmd1)

In [12]:
def createRewardRel(sess, n4j_blk_id, n4j_cb_id):

    cmd1 = """ 
            MATCH
            (a:block),
            (b:coinbase)
            WHERE 
            id(a) = {0}
            AND
            id(b) = {1}
            CREATE (a)-[r:reward]->(b)
            RETURN type(r)
            """
            
    cmd1 = cmd1.format(str(n4j_blk_id), str(n4j_cb_id))
    
    if logging:
        print(getTimeStamp() + " creating relationship : reward")
    
    sess.run(cmd1)
    

In [13]:
def createSeedsRel(sess, n4j_cb_id, n4j_tx_id):   

    cmd1 = """ 
            MATCH
            (a:coinbase),
            (b:tx)
            WHERE 
            id(a) = {0} 
            AND
            id(b) = {1}
            CREATE (a)-[r:seeds]->(b)
            RETURN type(r)
            """
            
    cmd1 = cmd1.format(str(n4j_cb_id), str(n4j_tx_id))
    
    if logging:
        print(getTimeStamp() + " creating relationship : seeds")
    
    sess.run(cmd1)

In [14]:
def createIncludesRel(sess, n4j_tx_id, n4j_blk_id):

    cmd1 = """ 
            MATCH
            (a:tx),
            (b:block)
            WHERE 
            id(a) = {0}
            AND
            id(b) = {1}
            CREATE (a)-[r:includes]->(b)
            RETURN type(r)
            """
            
    cmd1 = cmd1.format(str(n4j_tx_id), str(n4j_blk_id))
    
    if logging:
        print(getTimeStamp() + " creating relationship : includes")
            
    sess.run(cmd1)

In [15]:
def createOutRel(sess, n4j_tx_id, n4j_out_id):

    cmd1 = """ 
            MATCH
            (a:tx),
            (b:output)
            WHERE 
            id(a) = {0}
            AND
            id(b) = {1}
            CREATE (a)-[r:out]->(b)
            RETURN type(r)
            """
            
    cmd1 = cmd1.format(str(n4j_tx_id), str(n4j_out_id))
    
    if logging:
        print(getTimeStamp() + " creating relationship : out")
            
    sess.run(cmd1)

In [16]:
def createLockedRel(sess, n4j_out_id, address):

    cmd1 = """ 
            MATCH
            (a:output),
            (b:address)
            WHERE 
            id(a) = {0}
            AND
            b.address = "{1}"
            CREATE (a)-[r:locked]->(b)
            RETURN type(r)
            """
            
    cmd1 = cmd1.format(str(n4j_out_id), address)
    
    if logging:
        print(getTimeStamp() + " creating relationship : locked")
            
    sess.run(cmd1)

In [17]:
def createUnlockRel(sess, vin, n4j_tx_id):
    
    txid_in = str(vin["txid"])
    vout = str(vin["vout"])
    scriptSig_ = str(vin["scriptSig_hex"])

    cmd1 = "MATCH (a:tx) - [:out]-> (b:output) WHERE a.txid = '{0}' AND b.vout = '{1}' "
    cmd1 = cmd1.format(txid_in, vout, str(n4j_tx_id))
    
    cmd2 = "MATCH (c:tx) WHERE id(c) = {0} "
    cmd2 = cmd2.format(str(n4j_tx_id)) 
        
    cmd3 = "CREATE (b) - [r:unlock {scriptSig: '" + scriptSig_ + "'}] -> (c) RETURN r"
        
    cmd4 = cmd1 + cmd2 + cmd3
    
    if logging:
        print(getTimeStamp() + " creating relationship : unlock > scriptsig : " + scriptSig_)
    
    sess.run(cmd4)

In [18]:
def checkIndexExists(sess, nodelLabel, propName):
    
    cmd1 = "CALL db.indexes()"
    
    ret = sess.run(cmd1)
    
    index_list = ret.data()
    
    exists = False
    
    for each in index_list:
        
        labelsOrTypes = each["labelsOrTypes"]
        properties = each["properties"]
        
        if len(labelsOrTypes) > 0 and len(properties) > 0:
            if (labelsOrTypes[0] == nodelLabel and properties[0] == propName):
                exists = True
    
    return exists
    

In [19]:
def createIndex (sess, nodeLabel, propName):
    
    # CALL db.indexes()
    # DROP INDEX ON :nodeLabel(propName)
    
    cmd1 = "CREATE INDEX ON :" + nodeLabel + "(" + propName + ")"
    
    if logging:
        print(getTimeStamp() + " creating index > nodetype : " + nodeLabel + " > propname : " + propName)
        
    if not checkIndexExists(sess, nodeLabel, propName):
        sess.run(cmd1)

In [20]:
# Creating neo4j database driver and session
# graph = Graph(uri='neo4j://localhost:7687', user="neo4j", password="password")
dbc = GraphDatabase.driver(uri = "bolt://localhost:7687", auth=("neo4j", "password"))
print("Connected to Neo4J database : " + str(dbc))
print()
sess = dbc.session(database="neo4j")

Connected to Neo4J database : <neo4j._sync.driver.BoltDriver object at 0x130cba2b0>



In [32]:
# Return lists of the DAT file jsons
block_list = sorted(os.listdir("./result/"))

In [29]:

print ("STARTING IMPORT TO NEO4J")
print ("***********************************")
print ()

# Iterating through each pair of jsons for each DAT file
for a in range(0, 1):
# for a in range(0, len(block_list)):
    
    print('loading > ./result/' + block_list[a])
    bl = open('./result/' + block_list[a])
    bl_json = json.load(bl)

    #Iterate through all blocks in dat file
    # for i in range(len(bl_json)-5, len(bl_json)):    
    for i in range(0, 10):
        
        blk = bl_json[i]
        n4j_blk_id = createBlockNode(sess, blk)
        tx_list = blk["tx"]
        
        # If block is the not the first block create chain relationship
        prevBlkHash = str(blk["previousblockhash"])
        if prevBlkHash != "0000000000000000000000000000000000000000000000000000000000000000":
            createChainRel(sess, prevBlkHash, n4j_blk_id)
        
        #Iterating through each transaction associated with the current block
        for j in range(0, len(tx_list)):
        # for j in range(0,1):
            
            tx_data = tx_list[j]
            
            # Creating transaction node and includes relationship
            n4j_tx_id = createTxNode(sess, tx_data)
            createIncludesRel(sess, n4j_tx_id, n4j_blk_id)
            
            # Iterate through vin list in transaction
            for each in tx_data["vin"]:
                
                # If it is the first transaction create coinbase node, reward relationship, and seeds relationship
                txid_in = str(each["txid"])
                if txid_in == "0000000000000000000000000000000000000000000000000000000000000000":
                    n4j_cb_id = createCoinbaseNode(sess, tx_data)
                    createRewardRel(sess, n4j_blk_id, n4j_cb_id)
                    createSeedsRel(sess, n4j_cb_id, n4j_tx_id)
                    exit
                    
                # Otherwise create the unlock relationship
                else:
                    createUnlockRel(sess, each, n4j_tx_id)
            
            # Iterate through outputs for each transaction
            outputs = tx_data["vout"]
            for z in range(0, len(outputs)):
                
                # Creating output node and out relationship
                n4j_out_id = createOutputNode(sess, outputs[z], z)
                createOutRel(sess, n4j_tx_id, n4j_out_id)
                
                # Create creating address node if it does not exist and locked relationship to an address (might be address just created might not)
                address = str(outputs[z]["scriptPubKey"]["address"])
                if address != "N/A":
                    n4j_addr_id = createAddressNode(sess, address)
                    createLockedRel(sess, n4j_out_id, address)
            

STARTING IMPORT TO NEO4J
***********************************

loading > ./result/blocklist_00000.json
2022:11:26 14:33:01 EST -0500 creating node : block > hash_id : 000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f
2022:11:26 14:33:02 EST -0500 creating node : tx > txid : 4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b
2022:11:26 14:33:02 EST -0500 creating relationship : includes
2022:11:26 14:33:02 EST -0500 creating node : coinbase > value : 50.0
2022:11:26 14:33:02 EST -0500 creating relationship : reward
2022:11:26 14:33:03 EST -0500 creating relationship : seeds
2022:11:26 14:33:03 EST -0500 creating node : output > value : 50.0
2022:11:26 14:33:03 EST -0500 creating relationship : out
2022:11:26 14:33:03 EST -0500 creating node : block > hash_id : 00000000839a8e6886ab5951d76f411475428afc90947ee320161bbf18eb6048
2022:11:26 14:33:03 EST -0500 creating relationship : chain
2022:11:26 14:33:03 EST -0500 creating node : tx > txid : 0e3e2357e806b6cdb1f

In [30]:
# Creating indexes
createIndex(sess, "block", "hash")
createIndex(sess, "tx", "txid")
createIndex(sess, "address", "address")

2022:11:26 14:33:12 EST -0500 creating index > nodetype : block > propname : hash
2022:11:26 14:33:12 EST -0500 creating index > nodetype : tx > propname : txid
2022:11:26 14:33:12 EST -0500 creating index > nodetype : address > propname : address


In [31]:
print()
print ("***********************************")
print ("FINISHED IMPORT TO NEO4J")

# Closing session
sess.close()


***********************************
FINISHED IMPORT TO NEO4J
