Create .env file in the same directory as the notebook and add the following lines:

```env
NEO4J_USERNAME=your_username
NEO4J_PASSWORD=your_password
NEO4J_ENDPOINT=your_endpoint
```

Install python dependencies.

In [None]:
pip install -r requirements.txt

Connect to database.

In [2]:
from databaseconnection import DatabaseConnection
gds = DatabaseConnection().get_database_connection()
gds.version()

'2.3.2'

Should show a version number if connection to database is successful.

## Remove User label and nodes from database

In [3]:
gds.run_cypher("MATCH (u:User) DETACH DELETE u")

In [4]:
gds.run_cypher("SHOW CONSTRAINTS")

Unnamed: 0,id,name,type,entityType,labelsOrTypes,properties,ownedIndex
0,26,Answer_id,UNIQUENESS,NODE,[Answer],[id],Answer_id
1,20,Publication_id,UNIQUENESS,NODE,[Publication],[id],Publication_id
2,24,QuestionAlternative_id,UNIQUENESS,NODE,[QuestionAlternative],[id],QuestionAlternative_id
3,32,Question_id,UNIQUENESS,NODE,[Question],[id],Question_id
4,28,Respondent_id,UNIQUENESS,NODE,[Respondent],[id],Respondent_id
5,34,Survey_id,UNIQUENESS,NODE,[Survey],[id],Survey_id
6,22,User_email,UNIQUENESS,NODE,[User],[email],User_email
7,30,User_id,UNIQUENESS,NODE,[User],[id],User_id


In [5]:
gds.run_cypher("DROP CONSTRAINT User_id")
gds.run_cypher("DROP CONSTRAINT User_email")

In [6]:
gds.run_cypher("SHOW INDEXES")

Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint
0,25,Answer_id,ONLINE,100.0,RANGE,NODE,[Answer],[id],range-1.0,Answer_id
1,19,Publication_id,ONLINE,100.0,RANGE,NODE,[Publication],[id],range-1.0,Publication_id
2,23,QuestionAlternative_id,ONLINE,100.0,RANGE,NODE,[QuestionAlternative],[id],range-1.0,QuestionAlternative_id
3,31,Question_id,ONLINE,100.0,RANGE,NODE,[Question],[id],range-1.0,Question_id
4,27,Respondent_id,ONLINE,100.0,RANGE,NODE,[Respondent],[id],range-1.0,Respondent_id
5,33,Survey_id,ONLINE,100.0,RANGE,NODE,[Survey],[id],range-1.0,Survey_id
6,1,index_343aff4e,ONLINE,100.0,LOOKUP,NODE,,,token-lookup-1.0,
7,2,index_f7700477,ONLINE,100.0,LOOKUP,RELATIONSHIP,,,token-lookup-1.0,


## Add relationship between `Answer` and `QuestionAlternative`

In [7]:
gds.run_cypher("""
MATCH (a:Answer)-[ist:IS_ANSWER_TO]->(q:Question)-[co:CONSISTS_OF]->(qa:QuestionAlternative)
WHERE a.alternative = qa.id OR qa.id IN a.alternatives
MERGE (a)-[c:CHOSE]->(qa)
""")

Unnamed: 0,type(c)


### Remove `alternative` and `alternatives` properties from `Answer` nodes

In [10]:
# Remove properties from Answer
gds.run_cypher("""
MATCH (a:Answer) REMOVE a.alternative, a.alternatives
""")

## Add relationship between `Respondent` and `Survey`

In [None]:
gds.run_cypher("""
MATCH (r:Respondent)<-[:ANSWERED_BY]-(p:Publication)<-[:PUBLISHED_BY]-(s:Survey)
MERGE (r)<-[su:SURVEYED]-(s)
""")

![Schema after preprocessing](schema_after_preprocessing.png)

In [3]:
gds.run_cypher("""
MATCH (r:Respondent)-[ha:HAS_ANSWERED]->(a:Answer)-[c:CHOSE]->(qa:QuestionAlternative)
MERGE (r)-[:CHOSE_ALT]->(qa)
""")

![Schema after preprocessing 2](schema_after_preprocessing_2.png)

# Add a Student label to Respondent nodes

```cypher
WITH ["ff07d216-33e4-464d-8c35-c2fd3962282c", "c29919fa-793f-4b81-af49-92cef6c681fb", "6d0bec28-d5a4-46c3-9d3c-c17c7bf38ce8"] AS excludedSurveyIds
MATCH (r:Respondent)<-[:SURVEYED]-(s:Survey) WHERE NOT s.id IN excludedSurveyIds
SET r :Student
RETURN r
```

# Add a Staff label to Respondent nodes

```cypher
WITH ["ff07d216-33e4-464d-8c35-c2fd3962282c", "c29919fa-793f-4b81-af49-92cef6c681fb", "6d0bec28-d5a4-46c3-9d3c-c17c7bf38ce8"] AS includedSurveyIds
MATCH (r:Respondent)<-[:SURVEYED]-(s:Survey) WHERE s.id IN includedSurveyIds
SET r :Staff
RETURN r
```