# Graph Utilities for `neo4j`

# Preliminaries

In [1]:
import numpy as np
from pprint import pprint
import neo4j

In [2]:
import hashlib
from hashlib import sha256
from pprint import pprint
import time

# Utility functions

## `numpy` array store

In [3]:
#from collections.abc import MutableMapping

class NumpyStore():
    def __init__(self, driver):
        self.driver = driver
    
    @staticmethod
    def _np_to_key(a):
        m = sha256(a)
        m.update(a.dtype.name.encode('utf8'))
        m.update(str(a.shape).encode('utf8'))
        return m.hexdigest()[:16]
    
    @staticmethod
    def _add_np(tx, a, k):
        tx.run("MERGE (:ndarray {k: $k, dtype: $dtype, shape: $shape, bytes: $bytes})",
               k=k,
               dtype=a.dtype.name,
               shape=list(a.shape),
               bytes=a.tobytes())
        
    @staticmethod
    def _get_np(tx, k):
        response = tx.run("MATCH (a:ndarray) WHERE a.k = $k "
                          "RETURN a.dtype as dtype, a.shape as shape, a.bytes as bytes",
                          k=k)
        n_matches = 0
        for r in response:
            n_matches += 1
            a = np.frombuffer(r['bytes'], dtype=r['dtype']).reshape(r['shape'])
        assert n_matches <= 1, f"Found {n_matches} arrays of key {k}, when should only be one."
        if n_matches < 1:
            raise KeyError
        else:
            return a
        
    def _tput(self, a):
        k = self._np_to_key(a)
        with self.driver.session() as session:
            session.write_transaction(self._add_np, a, k=k)
        return k

    def _tget(self, k):
        with self.driver.session() as session:
            a = session.read_transaction(self._get_np, k)
        assert k == self._np_to_key(a)
        return a

    def store(self, a):
        return self._tput(a)
    
    def retrieve(self, key):
        return self._tget(k)
    
    def __getitem__(self, k):
        return self._tget(k)

    def __setitem__(self, k, v):
        return self._tput(v)

    def __delitem__(self):
        raise NotImplementedError

    def __iter__(self):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

---

# Tests

## Connecting

Need to get the `gpu-jupyter` and the `neo4j` docker containers connected. If run bare, something like:

    docker network connect gpu-jupyter_default gpu-jupyter 
    docker network connect gpu-jupyter_default neo4j
    docker network inspect gpu-jupyter_default 
    
Docker has better ways than this.

In [6]:
if __name__ == '__main__':
    driver = neo4j.GraphDatabase.driver("neo4j://172.19.0.2:7687", auth=("neo4j", "test"))
    driver.verify_connectivity()

    print("Testing ", end='')
    nps = NumpyStore(driver)
    test_arrays = []
    test_array_keys = []
    for i in range(10):
        print('.', end='')
        for j in range(100):
            for k in range(3):
                a = np.random.randn(i*j).reshape(i,j)
                test_arrays.append(a)
                k = nps.store(a)
                test_array_keys.append(k)
                assert np.equal(nps[k], a).all()
    print("Passed")

  driver.verify_connectivity()


Testing ..........Passed


---

# Publishing

To produce an importable `graph_utils_neo4j.py`:
1. Save this notebook
1. Uncomment the `jupyter nbconvert` line below
1. Execute it.
1. Comment out the convert again
1. Save the notebook again in that form

In [7]:
###!jupyter nbconvert --to script 'graph_utils_neo4j.ipynb'

[NbConvertApp] Converting notebook graph_utils_neo4j.ipynb to script
[NbConvertApp] Writing 3571 bytes to graph_utils_neo4j.py
