In [None]:
import spacy
from spacy import displacy
from IPython.display import HTML, display
import pandas as pd
import numpy as np

from neo4j import GraphDatabase
import os
from pathlib import Path

#load the French model
nlp = spacy.load("fr_core_news_sm")

# Load your text
text_path = Path("text1.txt")
TEXT = text_path.read_text(encoding="utf-8")

doc = nlp(TEXT)

displacy.render(doc, style="dep", jupyter=True)

In [12]:
token_infos_lines = []

for token in doc:
    token_infos_lines.append((token.text, token.lemma_, token.pos_, token.dep_,  token.idx, token.head.text, token.head.idx))

In [13]:
df_tokens = pd.DataFrame(token_infos_lines, columns=['text', 'lemma', 'pos','dep', 'idx',  'head', 'head_idx'])
display(df_tokens)

Unnamed: 0,text,lemma,pos,dep,idx,head,head_idx
0,Le,le,DET,det,0,chat,3
1,chat,chat,NOUN,nsubj,3,mange,8
2,mange,manger,VERB,ROOT,8,mange,8
3,une,un,DET,det,14,souris,18
4,souris,souris,NOUN,obj,18,mange,8
5,.,.,PUNCT,punct,24,mange,8


In [14]:
# Build token nodes and dependency edges
tokens = []
deps = []

doc_id = "text1"

for token in doc:
    uid = f"{doc_id}:{token.i}"
    tokens.append({
        "uid": uid,
        "text": token.text,
        "lemma": token.lemma_,
        "pos": token.pos_,
        "dep": token.dep_
    })
    if token.head.i != token.i:
        deps.append({
            "head_uid": f"{doc_id}:{token.head.i}",
            "child_uid": uid,
            "label": token.dep_
        })

In [None]:
# Build adjacency matrix
N = len(tokens)
uid_to_index = {token["uid"]: i for i, token in enumerate(tokens)}
adj_matrix = np.zeros((N, N), dtype=int)

for rel in deps:
    i = uid_to_index[rel["head_uid"]] #head_uid - the token that is head of a grammatical relation
    j = uid_to_index[rel["child_uid"]] #the token that depends on the head
    adj_matrix[i, j] = 1

print("Adjacency matrix shape:", adj_matrix.shape)
print("Number of edges:", adj_matrix.sum())


Adjacency matrix shape: (6, 6)
Number of edges: 5


In [None]:
# Connect to Neo4j

uri = os.getenv("NEO4J_URI", "bolt://localhost:7687") 
user = os.getenv("NEO4J_USER", "arailym")
password = os.getenv("NEO4J_PASSWORD", "neo4j")  

driver = GraphDatabase.driver(uri, auth=(user, password))


Le chat dort sur le canapé.
Le chien dort dans sa niche.


In [None]:
# Insert token nodes

def insert_tokens(tx, token_list):
    tx.run("""
    UNWIND $tokens AS t
    MERGE (token:Token {uid: t.uid})
    SET token.text = t.text, token.lemma = t.lemma, token.pos = t.pos, token.dep = t.dep
    """, tokens=token_list)

In [None]:
#Insert dependency edges

def insert_deps(tx, dep_list):
    tx.run("""
    UNWIND $deps AS d
    MATCH (h:Token {uid:d.head_uid}), (c:Token {uid:d.child_uid})
    MERGE (h)-[r:DEP]->(c)
    SET r.label = d.label
    """, deps=dep_list)

In [None]:
#Run insertion

with driver.session() as session:
    session.write_transaction(insert_tokens, tokens)
    session.write_transaction(insert_deps, deps)

driver.close()
print(" Graph inserted in Neo4j")



Si vous avez cette erreur :

https://github.com/explosion/spacy/issues/13864

avec 
```
displacy.render(doc, style="dep", jupyter=True)
```

changez dans le fichier incriminé :
```
from IPython.core.display import HTML, display
```

par : 
```
from IPython.display import HTML, display
```

# PERSONAL NOTE
Create a venv (in a folder called .venv)
```
python3.13 -m venv .venv
```
Activate it
```
source .venv/bin/activate
```

Install spacy inside the venv
```
pip install -U pip
pip install spacy
```

install ipykernel inside venv
```
pip install ipykernel
```

add venv as a Jupyter kernel
```
python -m ipykernel install --user --name=myvenv --display-name "Python (myvenv)"
```