In [7]:
import pandas as pd
from graphdatascience import GraphDataScience

## Setup

In [8]:
URI = "neo4j://localhost:7687"
password = 'password'

In [14]:
creds = ('neo4j', password)
gds = GraphDataScience(URI, auth=creds)

## Data

In [12]:
df = pd.read_csv(r"C:\Users\33638\Documents\GitHub\domaine-nc-jupyter-notebook\domaine-nc.csv")

In [34]:
df.isProtected.value_counts()

False    6683
True       19
Name: isProtected, dtype: int64

In [33]:
df[df.beneficiaire.isna()]

Unnamed: 0,nom,extension,isProtected,gestionnaire,beneficiaire,status,dns1,dns2,dns3,dns4,dns5,dns6,dateCreation,dateModification,dateExpiration,nbDaysBeforeExpires,expired,note
949,boulouparis,nc,True,,,,,,,,,,,,,,,
1364,chirurgiens-dentistes,nc,True,,,,,,,,,,,,,,,
1959,drehu,nc,True,,,,,,,,,,,,,,,
2691,gouvernement,nc,True,,,,,,,,,,,,,,,
2706,gratuit,nc,True,,,,,,,,,,,,,,,
2931,iaai,nc,True,,,,,,,,,,,,,,,
3942,marchespublics,nc,True,,,,,,,,,,,,,,,
3945,mare,nc,True,,,,,,,,,,,,,,,
4088,mercedes,nc,True,,,,,,,,,,,,,,,
4256,mont-dore,nc,True,,,,,,,,,,,,,,,


## CREATE CSV

In [15]:
gestionnaire = df.gestionnaire.unique().tolist()
gestionnaire.remove('AUCUN')
beneficiaire = df.beneficiaire.unique().tolist()
dns = df.dns1.unique().tolist() + df.dns2.unique().tolist() + df.dns3.unique().tolist() + df.dns4.unique().tolist() + df.dns5.unique().tolist() + df.dns6.unique().tolist()
dns = list(set(dns))

In [16]:
df_gestionnaire = pd.DataFrame({"gestionnaire":gestionnaire})
df_beneficiaire = pd.DataFrame({"beneficiaire":beneficiaire})
df_dns = pd.DataFrame({"dns":dns})

In [17]:
df_gestionnaire.to_csv("gestionnaires.csv",index=False)
df_beneficiaire.to_csv("beneficiaires.csv",index=False)
df_dns.to_csv("dns.csv",index=False)

### Pour la suite de l'execution du Notebook, il est nécessaire de copier les 4 fichiers CSV dans le dossier import associé à la base de données neo4J

![alt text](im1.png "Title")

## Create Constraint

In [19]:
def create_constrainte(constraint):
    c = f'''
    // Create constraints
    CREATE CONSTRAINT {constraint} IF NOT EXISTS ON (i:{constraint}) ASSERT i.id IS UNIQUE;
    '''
    gds.run_cypher(c)

In [20]:
create_constrainte("Gestionnaires")
create_constrainte("Beneficiaires")
create_constrainte("dns")

## Import Nodes

In [21]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
CREATE (:Site {nbDaysBeforeExpires : toInteger(line.nbDaysBeforeExpires), extension : line.extension, isProtected : line.isProtected, status : line.status, dateCreation : line.dateCreation, dateModification : line.dateModification, dateExpiration : line.dateExpiration, id: line.nom, expired : line.expired})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6702


In [22]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///gestionnaires.csv' AS line
WITH line
CREATE (:Gestionnaires {id:line.gestionnaire})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,56


In [23]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///beneficiaires.csv' AS line
CREATE (:Beneficiaires {id:line.beneficiaire})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,3596


In [24]:
c = '''
// Load node list
LOAD CSV WITH HEADERS FROM 'file:///dns.csv' AS line
CREATE (:dns {id:line.dns})
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2138


In [25]:
c = '''
MATCH (n:Gestionnaires {id: 'AUCUN'})
DELETE n
'''
gds.run_cypher(c)

## Import Edges

In [26]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Gestionnaires {id: line.gestionnaire})
MATCH (target:Site {id: line.nom})
MERGE (source)-[:GERE]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2471


In [27]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Beneficiaires {id: line.beneficiaire})
MATCH (target:Site {id: line.nom})
MERGE (source)-[:POSSEDE]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6695


In [28]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns1})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6695


In [29]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns2})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,6695


In [35]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns3})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2591


In [36]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns4})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,1090


In [37]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns5})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,181


In [38]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Site {id: line.nom})
MATCH (target:dns {id: line.dns6})
MERGE (source)-[:HEBERGESUR]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,37


In [39]:
c = '''
// Load edge list
LOAD CSV WITH HEADERS FROM 'file:///domaine-nc.csv' AS line
WITH line
MATCH (source:Beneficiaires {id: line.beneficiaire})
MATCH (target:Gestionnaires {id: line.gestionnaire})
MERGE (source)-[:CLIENTDE]->(target)
RETURN COUNT(*)
'''

gds.run_cypher(c)

Unnamed: 0,COUNT(*)
0,2466


## CLEAR DMBS (Si besoin enlever les paranthèses)

In [117]:
"""c = '''
//
MATCH (n)
DELETE n
'''
gds.run_cypher(c)"""

"c = '''\n//\nMATCH (n)\nDELETE n\n'''\ngds.run_cypher(c)"