In [44]:
import os
from dotenv import load_dotenv
load_dotenv('.env', override=True)
from graphdatascience import GraphDataScience
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='neo4j')

NEO4J_URI = os.environ.get("NEO4J_URI", "neo4j://10.1.7.170:30120")
NEO4J_AUTH = None
if os.environ.get("NEO4J_USERNAME") and os.environ.get("NEO4J_PASSWORD"):
    NEO4J_AUTH = (
        os.environ.get("NEO4J_USERNAME"),
        os.environ.get("NEO4J_PASSWORD"),
    )

gds = GraphDataScience(NEO4J_URI, auth=NEO4J_AUTH)



In [3]:
from graphdatascience.server_version.server_version import ServerVersion

assert gds.server_version() >= ServerVersion(1, 8, 0)

_ = gds.run_cypher(
    """
        CREATE
         (dan:Person {name: 'Dan'}),
         (annie:Person {name: 'Annie'}),
         (matt:Person {name: 'Matt'}),
         (jeff:Person {name: 'Jeff'}),
         (brie:Person {name: 'Brie'}),
         (elsa:Person {name: 'Elsa'}),

         (cookies:Product {name: 'Cookies'}),
         (tomatoes:Product {name: 'Tomatoes'}),
         (cucumber:Product {name: 'Cucumber'}),
         (celery:Product {name: 'Celery'}),
         (kale:Product {name: 'Kale'}),
         (milk:Product {name: 'Milk'}),
         (chocolate:Product {name: 'Chocolate'}),

         (dan)-[:BUYS {amount: 1.2}]->(cookies),
         (dan)-[:BUYS {amount: 3.2}]->(milk),
         (dan)-[:BUYS {amount: 2.2}]->(chocolate),

         (annie)-[:BUYS {amount: 1.2}]->(cucumber),
         (annie)-[:BUYS {amount: 3.2}]->(milk),
         (annie)-[:BUYS {amount: 3.2}]->(tomatoes),

         (matt)-[:BUYS {amount: 3}]->(tomatoes),
         (matt)-[:BUYS {amount: 2}]->(kale),
         (matt)-[:BUYS {amount: 1}]->(cucumber),

         (jeff)-[:BUYS {amount: 3}]->(cookies),
         (jeff)-[:BUYS {amount: 2}]->(milk),

         (brie)-[:BUYS {amount: 1}]->(tomatoes),
         (brie)-[:BUYS {amount: 2}]->(milk),
         (brie)-[:BUYS {amount: 2}]->(kale),
         (brie)-[:BUYS {amount: 3}]->(cucumber),
         (brie)-[:BUYS {amount: 0.3}]->(celery),

         (elsa)-[:BUYS {amount: 3}]->(chocolate),
         (elsa)-[:BUYS {amount: 3}]->(milk)
    """
)

In [49]:
# Definir la proyección de nodos con propiedades específicas
node_projection = {
    "Credito": {"properties": ["mto_desembolsado", "mto_saldo"]},
    "Persona": {"properties": ["nombreCompleto", "genero"]},
    "Localidad": {"properties": ["nombreLocalidad", "cod_localidad_reg"]},
    "ProductoFinanciero": {"properties": ["descripcion_corta", "estado"]}
}

# Definir la proyección de relaciones
relationship_projection = {
    "TIENE_CREDITO": {"orientation": "UNDIRECTED"},
    "TIENE_LOCALIDAD": {"orientation": "UNDIRECTED"},
    "TIENE_PRODUCTO": {"orientation": "UNDIRECTED"}
}


# Before actually going through with the projection, let's check how much memory is required
result = gds.graph.project.estimate(node_projection, relationship_projection)

print(f"Required memory for native loading: {result['requiredMemory']}")

Required memory for native loading: [88 MiB ... 154 MiB]


In [47]:
# For this small graph memory requirement is low. Let us go through with the projection
G, result = gds.graph.project("cartera", node_projection, relationship_projection)

print(f"The projection took {result['projectMillis']} ms")

# We can use convenience methods on `G` to check if the projection looks correct
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

Loading:   0%|          | 0/100 [00:00<?, ?%/s]



The projection took 14560 ms
Graph 'cartera' node count: 349848
Graph 'cartera' node labels: ['Localidad', 'ProductoFinanciero', 'Credito', 'Persona']


In [50]:
# Estimacion de memoria a usar con FASTRP
result = gds.fastRP.mutate.estimate(
    G,
    mutateProperty="embedding",
    randomSeed=42,
    embeddingDimension=4,
    featureProperties="mto_desembolsado",
    iterationWeights=[0.8, 1, 1, 1],
)

print(f"Required memory for running FastRP: {result['requiredMemory']}")

ClientError: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `gds.fastRP.mutate.estimate`: Caused by: java.lang.IllegalArgumentException: Relationship weight property `mto_desembolsado` not found in relationship types ['TIENE_CREDITO', 'TIENE_LOCALIDAD', 'TIENE_PRODUCTO']. Properties existing on all relationship types: []}

In [7]:
# Now let's run FastRP and mutate our projected graph 'purchases' with the results
result = gds.fastRP.mutate(
    G,
    mutateProperty="embedding",
    randomSeed=42,
    embeddingDimension=4,
    relationshipWeightProperty="amount",
    iterationWeights=[0.8, 1, 1, 1],
)

# Let's make sure we got an embedding for each node
print(f"Number of embedding vectors produced: {result['nodePropertiesWritten']}")

Number of embedding vectors produced: 52


Ahora podemos ejecutar kNN para identificar nodos similares utilizando las incrustaciones de nodos que generamos con FastRP como nodeProperties. Como estamos trabajando con un gráfico pequeño, podemos establecer sampleRate en 1 y deltaThreshold en 0 sin tener que preocuparnos por largos tiempos de cálculo. El parámetro de concurrencia se establece en 1 (junto con la semilla aleatoria fija) para obtener un resultado determinista. Consulte la sección de sintaxis de la documentación de kNN para obtener más información sobre estos parámetros.

Tenga en cuenta que usaremos el modo de escritura del algoritmo para escribir las propiedades y relaciones en nuestra base de datos, para que podamos analizarlas más tarde usando Cypher.

In [8]:
# Run kNN and write back to db (we skip memory estimation this time...)
result = gds.knn.write(
    G,
    topK=2,
    nodeProperties=["embedding"],
    randomSeed=42,
    concurrency=1,
    sampleRate=1.0,
    deltaThreshold=0.0,
    writeRelationshipType="SIMILAR",
    writeProperty="score",
)

print(f"Relationships produced: {result['relationshipsWritten']}")
print(f"Nodes compared: {result['nodesCompared']}")
print(f"Mean similarity: {result['similarityDistribution']['mean']}")

Relationships produced: 104
Nodes compared: 52
Mean similarity: 0.9814365827120267


In [9]:
gds.run_cypher(
    """
        MATCH (p1:Person)-[r:SIMILAR]->(p2:Person)
        RETURN p1.name AS person1, p2.name AS person2, r.score AS similarity
        ORDER BY similarity DESCENDING, person1, person2
    """
)

Unnamed: 0,person1,person2,similarity
0,Brie,Matt,0.999812
1,Matt,Brie,0.999812
2,Brie,Matt,0.999693
3,Matt,Brie,0.999693
4,Dan,Elsa,0.999376
5,Elsa,Dan,0.999376
6,Annie,Brie,0.997281
7,Annie,Jeff,0.995845
8,Jeff,Annie,0.995845
9,Jeff,Dan,0.994131


In [10]:
gds.run_cypher(
    """
        MATCH (:Person {name: "Annie"})-[:BUYS]->(p1:Product)
        WITH collect(p1) as products
        MATCH (:Person {name: "Matt"})-[:BUYS]->(p2:Product)
        WHERE not p2 in products
        RETURN p2.name as recommendation
    """
)

Unnamed: 0,recommendation
0,Kale
1,Kale
2,Kale
3,Kale
