In [None]:
from neo4j import GraphDatabase
from db_inference import simple_sql_db
from functools import lru_cache

NEO4J_DOCKER_INSTANCE_NAME = "testneo4j"

In [None]:
# https://towardsdatascience.com/create-a-graph-database-in-neo4j-using-python-4172d40f89c4
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response


conn = Neo4jConnection(uri=f"bolt://{NEO4J_DOCKER_INSTANCE_NAME}:7687", 
                       user="neo4j",              
                       pwd="test")

In [None]:
sql_db.get_tables()

In [None]:
%%time

sql_db = simple_sql_db.SimplifiedSqlDb()

@lru_cache(maxsize=1000000)
def p100_to_p30_cached(p100):
    row = sql_db.get_p30_cluster_for_p100(p100)
    return row['p30']

cur = sql_db.conn.cursor()
num_rows = 0
num_cluster_misses = 0
for r in cur.execute(f"SELECT * FROM prot2protwindow LIMIT 1000000;"):
    try:
        p30_a = p100_to_p30_cached(r['p1hash'])
        p30_b = p100_to_p30_cached(r['p2hash'])
    except AssertionError as e:
#         print(e)
        num_cluster_misses += 1
    num_rows += 1
    
print(num_rows, num_cluster_misses)





In [None]:
cur.execute()

In [None]:
num_rows

In [None]:
r.keys()

In [None]:
# check that node is unique
conn.query('CREATE CONSTRAINT p30s IF NOT EXISTS ON (p:P30)     ASSERT p.id IS UNIQUE')
