In [97]:
import pandas as pd
from graphdatascience import GraphDataScience

## Setup

In [None]:
URI      = "neo4j://localhost:7687"
password = 'password'

In [119]:
creds    = ('neo4j', password)
gds      = GraphDataScience(URI, auth=creds)

## Data

In [99]:
df = pd.read_csv("domaine-nc.csv")

In [100]:
df.head(1)

Unnamed: 0,nom,extension,isProtected,gestionnaire,beneficiaire,status,dns1,dns2,dns3,dateCreation,dateModification,dateExpiration,nbDaysBeforeExpires,expired,note
0,32,nc,False,LA FABRIK,Ridet : 1507490-001 32C,green,ns-408.awsdns-51.com,ns-1687.awsdns-18.co.uk,ns-1488.awsdns-58.org,2021-08-23,2021-08-23,2026-08-23,1485.0,False,Les données de cet annuaire WHOIS ne sont four...


## CREATE CSV

In [101]:
gestionnaire = df.gestionnaire.unique().tolist()
gestionnaire.remove('AUCUN')
beneficiaire = df.beneficiaire.unique().tolist()
dns = df.dns1.unique().tolist() + df.dns1.unique().tolist() + df.dns3.unique().tolist()
dns = list(set(dns))

In [102]:
df_gestionnaire = pd.DataFrame({"gestionnaire":gestionnaire})
df_beneficiaire = pd.DataFrame({"beneficiaire":beneficiaire})
df_dns = pd.DataFrame({"dns":dns})

In [103]:
df_gestionnaire.to_csv("gestionnaires.csv",index=False)
df_beneficiaire.to_csv("beneficiaires.csv",index=False)
df_dns.to_csv("dns.csv",index=False)

### Pour la suite de l'execution du Notebook, il est nécessaire de copier les 4 fichiers CSV dans le dossier import associé à la base de données neo4J

![alt text](im1.png "Title")

## Create Constraint

In [104]:
def create_constrainte(constraint):
    c = f'''
    // Create constraints
    CREATE CONSTRAINT {constraint} IF NOT EXISTS ON (i:{constraint}) ASSERT i.id IS UNIQUE;
    '''
    gds.run_cypher(c)

In [105]:
create_constrainte("Gestionnaires")
create_constrainte("Beneficiaires")
create_constrainte("dns")

## Import Nodes

In [106]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
CREATE (:Site {nbDaysBeforeExpires : toInteger(line.nbDaysBeforeExpires), extension : line.extension, isProtected : line.isProtected, status : line.status, dateCreation : line.dateCreation, dateModification : line.dateModification, dateExpiration : line.dateExpiration, id: line.nom, expired : line.expired})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6676


In [107]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///gestionnaires.csv' AS line
WITH line
CREATE (:Gestionnaires {id:line.gestionnaire})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,56


In [108]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///beneficiaires.csv' AS line
CREATE (:Beneficiaires {id:line.beneficiaire})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,3587


In [109]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///dns.csv' AS line
CREATE (:dns {id:line.dns})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,1215


In [110]:
c = '''
MATCH (n:Gestionnaires {id: 'AUCUN'})
DELETE n
'''
gds.run_cypher(c)

## Import Edges

In [111]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Gestionnaires {id: line.gestionnaire})
MATCH (target:Site {id: line.nom})
MERGE (source)-[:GERE]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2480


In [112]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Beneficiaires {id: line.beneficiaire})
MATCH (target:Site {id: line.nom})
MERGE (source)-[:POSSEDE]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6669


In [113]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns1})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6669


In [114]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns2})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2663


In [115]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns3})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2585


In [116]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Beneficiaires {id: line.beneficiaire})
MATCH (target:Gestionnaires {id: line.gestionnaire})
MERGE (source)-[:CLIENTDE]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2475


## CLEAR DMBS (Si besoin enlever les paranthèses)

In [117]:
"""c = '''
//
MATCH (n)
DELETE n
'''
gds.run_cypher(c)"""

"c = '''\n//\nMATCH (n)\nDELETE n\n'''\ngds.run_cypher(c)"