In [None]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase
import pandas as pd
from pathlib import Path

print(os.getcwd())

env_path = Path('..') / '.env.local'
load_dotenv(dotenv_path=env_path)

# Retrieve credentials
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASSWORD")

print(uri)

driver = GraphDatabase.driver(uri, auth=(username, password))

In [None]:
def run_cypher(query):
    with driver.session() as session:
        result = session.run(query)
        return pd.DataFrame([r.data() for r in result])


In [None]:
df = run_cypher("MATCH (n:Participant) RETURN count(n) AS count")
print(df)

In [None]:
query = """
MATCH (entry:Entry)-[:HAS_VOICE]->(voice:Voice)
RETURN voice, entry.date AS date
ORDER BY date
"""

from datetime import datetime

def neo4j_datetime_to_py(datetime_obj):
    # Convert neo4j.time.DateTime to Python datetime
    return datetime_obj.to_native()

def run_cypher(query):
    with driver.session() as session:
        result = session.run(query)
        records = []
        for record in result:
            voice_node = record["voice"]
            voice_props = dict(voice_node.items())
            # Convert neo4j DateTime to Python datetime for the 'date' field
            voice_props["date"] = neo4j_datetime_to_py(record["date"])
            records.append(voice_props)
        return pd.DataFrame(records)


df = run_cypher(query)
print(df.head())


In [None]:
import plotly.express as px

df['date'] = pd.to_datetime(df['date'])

fig = px.scatter(
    df,
    x='date',
    y='duration',
    title='Voice Notes: Duration over Time',
    labels={'date': 'Date', 'duration': 'Duration (seconds)'},
    hover_data=df.columns  # Show all data on hover
)

In [None]:
fig.show("notebook_connected")

In [None]:
query = """
MATCH (p:Participant)<-[:SENT_BY]-(e:Entry)
RETURN p.handle AS handle, count(e) AS entry_count
ORDER BY entry_count DESC
"""

def run_cypher(query):
    with driver.session() as session:
        result = session.run(query)
        data = [record.data() for record in result]
        return pd.DataFrame(data)

df_participant_counts = run_cypher(query)

In [None]:
fig = px.bar(
    df_participant_counts,
    x='handle',
    y='entry_count',
    title='Number of Entries per Participant',
    labels={'handle': 'Participant Handle', 'entry_count': 'Entry Count'},
    text='entry_count'
)

fig.update_traces(textposition='outside')
fig.update_layout(xaxis_tickangle=-45)

fig.show("notebook_connected")
