# Neo4j Notebook

Notebook criado para executar comandos do Neo4j fora do browser do banco.


In [1]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np
import os
import glob

In [2]:
class ConnectionNeo4j():

    #Abrindo conexao
    uri = "bolt://localhost:11003" #local BD
    password = "password" #local BD
    user = "neo4j"
    driver = GraphDatabase.driver(uri, auth=(user, password))

    #Fechando conexao
    def close(self):
        self.driver.close()

    #Salvando todos as possível para todas as cidades
    def salvarPathCidades(self, cityName):
        self.driver = GraphDatabase.driver(self.uri, auth=(self.user, self.password))

        with self.driver.session(database="neo4j") as session:
            result = session.read_transaction(self._pathCidade, cityName)

            return result
        

    #Funcao com a query de busca
    @staticmethod
    def _pathCidade(tx, cityName):
        query = (
            "WITH \"MATCH (source:Cidade {cod_mun:"+cityName+"})"
            " CALL gds.allShortestPaths.dijkstra.stream('grafoFluxo', {"
            "     sourceNode: source,"
            "     relationshipWeightProperty: 'fluxo_geral'})"
            " YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path"
            " RETURN"
            "     index as index,"
            "     gds.util.asNode(sourceNode).cod_mun AS cod_mun_origem,"
            "     gds.util.asNode(targetNode).cod_mun AS cod_mun_destino,"
            "     totalCost as totalCost,"
            "     [nodeId IN nodeIds | gds.util.asNode(nodeId).cod_mun] AS cod_nos,"
            "     costs as costs\" AS query"
            f" CALL apoc.export.csv.query(query, \"cidades/{cityName}.csv\""+", {})"
            " YIELD file, source, format, nodes, relationships, properties, time, rows, batchSize, batches, done, data"
            " RETURN file, source, format, nodes, relationships, properties, time, rows, batchSize, batches, done, data;"
        )
        
        result = tx.run(query)
        return result.values("file")



In [None]:
dfCidades = pd.read_csv("dados_final/2_dados_sem_enriquecimento/arr_mun.csv")
listCodCidade = dfCidades.cod_cidade.to_list()

cnnNeo = ConnectionNeo4j()
for codigoCidade in listCodCidade:
    file = cnnNeo.salvarPathCidades(str(codigoCidade))
    print(file)
    
cnnNeo.close()

### Pós-processamento dos dados

In [40]:
csvFiles[0]

'/Users/mariama/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-2301576f-ef62-43fd-a1ce-7831cbf6f537/import/cidades/2310209.csv'

In [44]:
# Carregando lista de arquivos .csv
path = "/Users/mariama/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-2301576f-ef62-43fd-a1ce-7831cbf6f537/import/cidades/"
csvFiles = glob.glob(os.path.join(path, "*.csv"))

# Carregando arquivos em uma lista de dataframes
dfList = []

for f in csvFiles: 
    dfList.append(pd.read_csv(f))

In [38]:
def convert(lst):
    return [ -i for i in lst ]

In [36]:
# f = "/Users/mariama/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-2301576f-ef62-43fd-a1ce-7831cbf6f537/import/cidades/1100015.csv"
# df_list = [pd.read_csv(f)]

In [45]:
for df, filePath in zip(dfList, csvFiles):
    path, fileName = os.path.split(filePath)
    df["totalCost"] = -1 * df["totalCost"]
    df['costs'] = df['costs'].apply(eval)
    df['costs'] = df['costs'].apply(lambda x: convert(x))
    df.rename(columns={'totalCost': 'probabilidade_total', 'costs': 'probabilidade_caminho'}, inplace=True)
    df.drop(columns=['index'], inplace=True)
    df.to_csv(f"dados_final/4_dados_calculados/paths/{fileName}", index=False)