diff --git a/next.config.mjs b/next.config.mjs index aa99e3eae..a2a126c27 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -3435,6 +3435,11 @@ export default withNextra({ destination: '/getting-started/build-memgraph-from-source#run-memgraph', permanent: true }, + { + source: '/data-migration/migrate-from-neo4j-using-single-cypher-query', + destination: 'data-migration/migrate-from-neo4j/using-single-cypher-query', + permanent: true + }, { source: '/database-management/authentication-and-authorization/auth-system-integrations#flags', destination: '/database-management/authentication-and-authorization/auth-system-integrations#configuration-flags', diff --git a/pages/data-migration/_meta.ts b/pages/data-migration/_meta.ts index 454863979..d4409d467 100644 --- a/pages/data-migration/_meta.ts +++ b/pages/data-migration/_meta.ts @@ -4,7 +4,6 @@ export default { "json": "JSON", "cypherl": "CYPHERL", "migrate-from-neo4j": "Migrate from Neo4j", - "migrate-from-neo4j-using-single-cypher-query": "Migrate from Neo4j using a single Cypher query", "migrate-from-rdbms": "Migrate from RDBMS using CSV files", "migrate-from-rdbms-directly": "Migrate from RDBMS using MAGE modules", "migrate-iceberg-tables-from-data-lake-using-dremio": "Migrate Apache Iceberg tables from data lake using Dremio", diff --git a/pages/data-migration/migrate-from-neo4j.mdx b/pages/data-migration/migrate-from-neo4j.mdx index 292fadab5..90835662d 100644 --- a/pages/data-migration/migrate-from-neo4j.mdx +++ b/pages/data-migration/migrate-from-neo4j.mdx @@ -3,538 +3,81 @@ title: Migrate from Neo4j to Memgraph description: Switch from Neo4j to Memgraph smoothly. Detailed documentation to guide the migration process for a seamless transition to Memgraph. --- -import { Cards } from 'nextra/components' -import { Callout } from 'nextra/components' -import {CommunityLinks} from '/components/social-card/CommunityLinks' - # Migrate from Neo4j to Memgraph -Memgraph is an open-source graph database built for streaming and compatible -with Neo4j. It uses Cypher query language and Bolt protocol. This means that you -can use the same tools and drivers that you are already using with Neo4j. Due to -the ACID compliance, data persistency and replication support in the community -version, Memgraph can be used as the main database for your applications, instead of -Neo4j. - -This tutorial will show you how to **migrate data in the CSV format from Neo4j to -Memgraph**. If you are more of a visual learner, check out the video tutorial: - -
- - - - -If you prefer to **export your data from Neo4j into the Cypher queries format**, -then it's necessary to [convert those files into Memgraph CYPHERL format](https://github.com/memgraph/best-practices/blob/main/import/cypher/README.md). - -Learn more about the shortest path to import data into Memgraph by reading [best -practices for import](/data-migration/best-practices). - - -## Prerequisites - -To follow this tutorial, you will need to have the following: - -- Running Neo4j instance (with your data, or use the sample data provided) -- Memgraph MAGE and Memgraph Lab installed and running: - - The easiest way to run Memgraph MAGE and Memgraph Lab is by - executing one of the following commands, depending on your operating system: - - **For Linux and macOS:** - ``` - curl https://install.memgraph.com | sh - ``` - - **For Windows:** - ``` - iwr https://windows.memgraph.com | iex - ``` - - There are other [installation - options](/getting-started/install-memgraph#installation-options) available - as well, so make sure you choose the preferred one for your environment. - -## Data schema - -One of the first steps to consider is how to migrate your data. If you have your -data in the form of [Cypher queries](/data-migration/cypherl) or -[CSV](/data-migration/csv) or [JSON](/data-migration/json) format, you can -import these formats into Memgraph. Keep in mind that for importing larger -datasets it is recommended to use CSV format or pure Cypher queries (Memgraph's -CYPHERL format), since they can be imported into Memgraph natively, faster than -JSON format. - -This tutorial will go through exporting data from Neo4j into CSV files and -importing it into Memgraph using [LOAD CSV](/data-migration/csv) -query and Memgraph's user visual interface [Memgraph Lab](/data-visualization). - -The sample dataset consists of 3 different kinds of nodes (Employee, Order and -Product) connected with 3 types of relationships as described by the graph -schema below: - -![](/pages/data-migration/migrate-from-neo4j/shipping_schema.png) - -To create this graph in your Neo4j instance run the following queries: - -```cypher -LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/orders.csv' AS column -MERGE (order:Order {orderID: column.OrderID}) - ON CREATE SET order.shipName = column.ShipName; - -LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/products.csv' AS column -MERGE (product:Product {productID: column.ProductID}) - ON CREATE SET product.productName = column.ProductName, product.unitPrice = toFloat(column.UnitPrice); - -LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/employees.csv' AS column -MERGE (e:Employee {employeeID:column.EmployeeID}) - ON CREATE SET e.firstName = column.FirstName, e.lastName = column.LastName, e.title = column.Title; - -CREATE INDEX product_id FOR (p:Product) ON (p.productID); -CREATE INDEX product_name FOR (p:Product) ON (p.productName); -CREATE INDEX employee_id FOR (e:Employee) ON (e.employeeID); -CALL db.awaitIndexes(); - -LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/orders.csv' AS column -MATCH (order:Order {orderID: column.OrderID}) -MATCH (product:Product {productID: column.ProductID}) -MERGE (order)-[op:CONTAINS]->(product) - ON CREATE SET op.unitPrice = toFloat(column.UnitPrice), op.quantity = toFloat(column.Quantity); - -LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/orders.csv' AS column -MATCH (order:Order {orderID: column.OrderID}) -MATCH (employee:Employee {employeeID: column.EmployeeID}) -MERGE (employee)-[:SOLD]->(order); - -LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/employees.csv' AS column -MATCH (employee:Employee {employeeID: column.EmployeeID}) -MATCH (manager:Employee {employeeID: column.ReportsTo}) -MERGE (employee)-[:REPORTS_TO]->(manager); -``` - -When migrating a particular dataset, be aware of the differences between Neo4j -and [Memgraph data types](/fundamentals/data-types). For example, Memgraph -doesn’t yet support the `POINT` spatial type, but you can represent it as a -map with the appropriate properties. - -## Exporting data from Neo4j - -Download the CSV file -[shipping.csv](https://public-assets.memgraph.com/data-migration/csv-cypher/shipping.csv) -containing the data above if you don't want to go through the exporting process. - -To get your data out of Neo4j instance, use the Neo4j APOC export functionality. -To install APOC, select the project, then in the right-side menu select *Plugins --> APOC* and press install. - -![](/pages/data-migration/migrate-from-neo4j/install_APOC.png) - -Then enable export by setting the configuration flag `apoc.export.file.enabled` -to `true` in the `apoc.config` file located in the `config` directory. To open -the directory, select the active project, click on *...* -> *Open folder* -> -*Configuration*. - -Export the data into a CSV file using: - -```cypher -CALL apoc.export.csv.all("shipping.csv", {}) -``` - -Once exported, the file is located in Neo4j's *Import* folder. To open it, -select the active project, click on *...* -> *Open folder* -> *Import*. - -![](/pages/data-migration/migrate-from-neo4j/import_folder.png) - -## Importing data into Memgraph - -Now that the CSV file containing the needed data has been generated, let's -import data into Memgraph. - -As the original location of file is quite cumbersome, relocate it somewhere -more accessible. - -### 1. Transfer files into Memgraph container - -When working with Docker, the file needs to be transferred from your local -directory into the Docker container where Memgraph can access it. - -This can be done by copying the file into your running instance. Here are the steps to achieve that: - -1. To copy the file inside the container, open a new terminal to find out the - `CONTAINER ID` with `docker ps`, then run: - - ``` - docker cp /path_to_local_folder/shipping.csv :/usr/lib/memgraph/shipping.csv - ``` - - If the container ID is `bed1e5c9192d` and the file is locally located at - `C:/Data` the command would look like this: - - ``` - docker cp C:/Data/shipping.csv bed1:/usr/lib/memgraph/shipping.csv - ``` - -2. To check if the files are inside the container, first run: - - ``` - docker exec -it CONTAINER_ID bash - ``` - -3. List the files inside the `/usr/lib/memgraph`. - - ```nocopy - C:\Users\Vlasta>docker ps - CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES - bed1e5c9192d memgraph/memgraph-mage "/bin/sh -c '/usr/bi…" 2 minutes ago Up 2 minutes 0.0.0.0:7444->7444/tcp, 0.0.0.0:7687->7687/tcp recursing_blackburn - - C:\Users\Vlasta>docker cp C:/Data/shipping.csv bed1:/usr/lib/memgraph/shipping.csv - - C:\Users\Vlasta>docker exec -it bed1 bash - root@bed1e5c9192d:/# ls /usr/lib/memgraph - auth_module memgraph python_support query_modules shipping.csv - root@bed1e5c9192d:/# - ``` - -### 2. Gaining speed with indexes and analytical storage mode - -Although the dataset imported in this tutorial is quite small, one day you might -want to import really big datasets with billions of nodes and relationships and -you will require all the extra speed you can get. - -To gain speed you can [create indexes](/fundamentals/indexes) on the -properties used to connect nodes with relationships which are the values in the -`_id` column in the CSV files, and in Memgraph they will be named `nodeID`. - -**To create indexes, run:** - -```cypher -CREATE INDEX ON :Employee(nodeID); -CREATE INDEX ON :Order(nodeID); -CREATE INDEX ON :Product(nodeID); -``` - -You can also change the [storage mode](/fundamentals/storage-memory-usage) from -`IN_MEMORY_TRANSACTIONAL` to `IN_MEMORY_ANALYTICAL`. This will disable the -creation of durability files (snapshots and WAL files) and you will no longer -have any ACID guarantees. Other transactions will be able to see the changes of -ongoing transactions. Also, transaction will be able to see the changes they are -doing. This means that the transactions can be committed in random orders, and -the updates to the data, in the end, might not be correct. - -But, if you import on one thread, batch of data after a batch of data, there -should be absolutely no issues, and you will gain 6 times faster import with 6 -times less memory consumption. - -After import you can switch back to the `IN_MEMORY_TRANSACTIONAL` storage mode or -continue running analytics queries (only read queries) in the -`IN_MEMORY_ANALYTICAL` mode to continue benefiting from low memory consumption. - -To switch between modes, run the following queries on a running instance: - -```cypher -STORAGE MODE IN_MEMORY_ANALYTICAL; -STORAGE MODE IN_MEMORY_TRANSACTIONAL; -``` - -To check the current storage mode, run: - -```cypher -SHOW STORAGE INFO; -``` - -**Change the storage mode to analytical before import.** - -```cypher -STORAGE MODE IN_MEMORY_ANALYTICAL; -``` +{/* TODO: Migration best practices */} -### 3. Importing nodes -To import nodes using a LOAD CSV clause let's examine the clause syntax: - -```cypher -LOAD CSV FROM "csv-file-path.csv" ( WITH | NO ) HEADER [IGNORE BAD] [DELIMITER ] [QUOTE ] AS -``` - -The file is now located at `/usr/lib/memgraph/shipping.csv` and it has a header -row. There is no need to ignore bad rows, the default delimiter is `,` and -the default quote character `"`, the same as in the exported CSV file, so no -changes are necessary. - -The first row of the LOAD CSV clause therefore looks like this: - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -``` - -Nodes are always imported before relationships so they will be imported first. - -The `shipping.csv` file contains the following columns important for node -creation: `_id`, `labels`, `employeeID`, `firstName`, `lastName`, `orderID`, -`productID`, `productName`, `shipName`, `title`, `unitPrice`. - -The `_id` property is actually an internal node ID needed to create -relationships later on. - -Execute queries in Memgraph Lab. Open your browser and go to -`http://localhost:3000/`, **Connect now** to the instance and go to the **Query -Execution** section. - -#### Employee nodes - -Begin with `Employee` nodes. - -After establishing the location and format of the CSV file, filter out the rows -that contain the label `:Employee`: - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._labels = ':Employee' -``` - -Then, create nodes with a certain label and properties. As an example, let's -look at the property `_id`. To add the property to the node, define its name in -Memgraph and assigned the value of a specific column in the CSV file. - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._labels = ':Employee' -CREATE (e:Employee {nodeID: row._id}) -``` - -So `nodeID: row._id` part of the query instructs Memgraph to create a property -named `nodeID` and assign it the value paired with key `_id`. First created -node will be assigned the value from the first data row, second node from the -second data row, etc. - -**After matching up the keys and values for all properties, the finished query looks like this:** - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._labels = ':Employee' -CREATE (e:Employee {nodeID: row._id, employeeID: row.employeeID, firstName: row.firstName, lastName: row.lastName, title: row.title}); - -MATCH (e:Employee) -RETURN e; -``` - -The second query will show all 9 created nodes. - -Copy the query in the **Cypher Editor** and **Run Query**. - -![](/pages/data-migration/migrate-from-neo4j/employees.png) - -#### Order nodes - -Relevant properties for the `Order` nodes are `_id`, `orderID` and `shipName`. - -**To create `Order` nodes run the following query:** - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._labels = ':Order' -CREATE (o:Order {nodeID: row._id, orderID: row.orderID, shipName: row.shipName}); - -MATCH (o:Order) -RETURN o; -``` - -The second query will show all 830 created nodes: - -![](/pages/data-migration/migrate-from-neo4j/orders.png) - -#### Product nodes - -Relevant properties for the `Product` nodes are `_id`, `productID`, `productName` -and `unitPrice`. - -As the parser parses all the values as strings, and the `unitPrice` are numbers, -they need to be converted to appropriate data type. - -**To create `Product` nodes run the following query:** - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._labels = ':Product' -CREATE (p:Product {nodeID: row._id, productID: row.productID, productName: row.productName, unitPrice: ToFloat(row.unitPrice)}); - -MATCH (p:Product) -RETURN p; -``` - -The second query will show all 77 created nodes: - -![](/pages/data-migration/migrate-from-neo4j/product.png) - -### 4. Graph improvements - -At this point it would be nice to improve the look of the nodes visually. At the -moment, nodes in the graph are represented with their labels, but it would be -more useful if their name attribute was written. - -To adjust the look of the graph using Graph Style Language, open the Graph Style -Editor. Find the following code block: - -``` -@NodeStyle HasProperty(node, "name") { - label: AsText(Property(node, "name")) -} -``` - -It defines that if the node has the property `name`, its label on the visual -graph will be that property. - -As none of the imported nodes have the property `name`, this part of the code -needs to be adjusted to use the properties nodes do have. - -Replace those three lines of code with the following block and **Apply** the -changes: - -``` -@NodeStyle HasProperty(node, "firstName") { - label: AsText(Property(node, "firstName")) -} - -@NodeStyle HasProperty(node, "orderID") { - label: AsText(Property(node, "orderID")) -} - -@NodeStyle HasProperty(node, "productName") { - label: AsText(Property(node, "productName")) -} -``` - -![](/pages/data-migration/migrate-from-neo4j/GSS.png) - -Visual appearance of the graph can be changed in many different ways, so be sure -to check the [GSS documentation](/data-visualization/graph-style-script). - -### 5. Importing relationships - -Now that all the 916 nodes have been imported, they can be connected with relationships. - -The first row of the LOAD CSV remains the same: - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -``` - -The `shipping.csv` file contains the following values important for relationship -creation: `_type`, `_start`, `_end`, `quantity` and `unitPrice`. - -The `_type` defines relationships type, `_start` and `_end` values define which -nodes need to be connected based on their ID. - -#### :REPORTS_TO relationships - -Begin with `:REPORTS_TO` relationship. - -After establishing the location and format of the CSV file, filter out the rows -that contain the type `REPORTS_TO`: - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._type = 'REPORTS_TO' -``` - -Now instruct Memgraph to find the nodes with certain IDs in order to create a -relationship between them. As node IDs are unique we can just define that any -node with a certain ID is a starting point, and another node with a certain ID -is the end point of the relationship type `REPORTS_TO`. - -**The LOAD CSV query creates 8 `:REPORTS_TO` relationships:** - -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._type = 'REPORTS_TO' -MATCH (n {nodeID: row._start}), (n2 {nodeID: row._end}) -CREATE (n)-[:REPORTS_TO]->(n2); - -MATCH p=()-[:REPORTS_TO]->() -RETURN p; -``` - -The second query returns all the nodes connected with the `REPORTS_TO` type of -relationship. - -![](/pages/data-migration/migrate-from-neo4j/reports_to.png) +import { Callout } from 'nextra/components' +import { Cards } from 'nextra/components' +import {CommunityLinks} from '/components/social-card/CommunityLinks' -#### :SOLD relationships -**The LOAD CSV query creates 830 `:SOLD` relationships:** +Memgraph is fully compatible with Neo4j’s **Cypher query language** and **Bolt +protocol**, making it easy to transition your graph data with minimal effort. +Depending on your use case, dataset size, and workflow preferences, you can +choose one of the following three migration methods. -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._type = 'SOLD' -MATCH (n {nodeID: row._start}), (n2 {nodeID: row._end}) -CREATE (n)-[:SOLD]->(n2); -MATCH p=()-[:SOLD]->() -RETURN p; -``` +## Migrate using CSV files -The second query returns all the nodes connected with the `SOLD` type of -relationship. +This method involves **exporting your Neo4j data to CSV** and then **importing +it into Memgraph** using the `LOAD CSV` command. -![](/pages/data-migration/migrate-from-neo4j/sold.png) +### When to use +- You prefer a **manual, step-by-step** migration process. +- You want **full control** over data transformation and indexing. +- Ideal for **medium to large datasets** when export/import time is acceptable. -#### :CONTAINS relationships +### Key features +- Uses Neo4j’s APOC library to export data (`CALL apoc.export.csv.all`). +- Data imported into Memgraph using `LOAD CSV` in **Memgraph Lab**. +- Offers **fine-grained control** over node and relationship creation. -This relationship type has properties about the `quantity` of products one order -contains. +> 💡 [Learn more → Migrate from Neo4j to Memgraph using CSV +files](/data-migration/migrate-from-neo4j/using-csv-files) -As the parser parses all the values as strings, and the value of this -relationship property are numbers, they need to be converted to appropriate -type. +--- -**The LOAD CSV query creates 2155 `CONTAINS` relationships:** +## Migrate using a single Cypher query -```cypher -LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row -WITH row WHERE row._type = 'CONTAINS' -MATCH (n {nodeID: row._start}), (n2 {nodeID: row._end}) -CREATE (n)-[:CONTAINS {quantity: ToInteger(row.quantity)}]->(n2); +This approach uses the **`migrate.neo4j`** procedure from the **MAGE module** to +**stream data directly** from Neo4j into Memgraph — no intermediate files +required. -MATCH p=()-[:CONTAINS]->() -RETURN p; -``` +### When to use +- You want a **fast and automated** migration process. +- You prefer to **avoid manual export/import** steps. +- Best for **large or live datasets** with minimal downtime. -The second query returns all the nodes connected with the `CONTAINS` type of -relationship. +### Key features +- Runs a **single Cypher query** to copy nodes and relationships. +- Supports **automatic property and label mapping**. +- Can selectively migrate **entire graphs or subsets**. +- Available out of the box in the `memgraph/memgraph-mage` Docker image. -![](/pages/data-migration/migrate-from-neo4j/contains.png) +> 💡 [Learn more → Migrate from Neo4j to Memgraph using a single Cypher +> query](/data-migration/migrate-from-neo4j/using-single-cypher-query) -## After import +--- -Once all the 916 nodes and 2993 relationships have been imported decide whether -you want to switch back to the transactional storage mode or not. Remember that -the analytical storage mode you are using right now has no ACID compliance. +## Choose the right method -To switch back to the analytical storage mode, run: +Each migration approach offers different benefits depending on your **data +size**, **complexity**, and **automation needs**: -```cypher -STORAGE MODE IN_MEMORY_TRANSACTIONAL; -``` +| Scenario | Recommended method | Why | +|-----------|--------------------|-----| +| You want a **hands-on process** and control over data structure | **CSV files** | Gives you flexibility to inspect, clean, and transform data during migration. | +| You need a **quick, end-to-end migration** with minimal manual work | **Single Cypher query** | Fastest option for large or production datasets, and doesn’t require temporary files. | +| You’re migrating **select parts** of your graph or testing the process before a full migration | **Single Cypher query** | Supports partial or incremental transfers directly over the Bolt connection. | +| You’re working in a **restricted or offline environment** where direct connection between Neo4j and Memgraph isn’t possible | **CSV files** | Doesn’t require network access — just export and import files manually. | -To check the switch was successful, run: +> 💡 **Tip:** If you’re unsure which to choose, start with the **CSV method** +> for better visibility into your data, then use the **Cypher query method** for +> production migrations once you’re confident in the process. -```cypher -SHOW STORAGE INFO; -``` -You can query the database using the [Cypher query language](/querying), use -various [graph algorithms and modules](/advanced-algorithms) from our -open-source repository MAGE to solve graph analytics problems, create awesome -customized visual displays of your nodes and relationships with [Graph Style -Script](/data-visualization/graph-style-script) and above all - enjoy your new -graph database! ## Memgraph's office hours @@ -554,4 +97,4 @@ nonsense or sales pitch, just tech. /> - + \ No newline at end of file diff --git a/pages/data-migration/migrate-from-neo4j/_meta.ts b/pages/data-migration/migrate-from-neo4j/_meta.ts new file mode 100644 index 000000000..61daf3017 --- /dev/null +++ b/pages/data-migration/migrate-from-neo4j/_meta.ts @@ -0,0 +1,4 @@ +export default { + "using-csv-files": "Using CSV files", + "using-single-cypher-query": "Using a single Cypher query", +} diff --git a/pages/data-migration/migrate-from-neo4j/using-csv-files.mdx b/pages/data-migration/migrate-from-neo4j/using-csv-files.mdx new file mode 100644 index 000000000..316c59160 --- /dev/null +++ b/pages/data-migration/migrate-from-neo4j/using-csv-files.mdx @@ -0,0 +1,558 @@ +--- +title: Migrate from Neo4j to Memgraph using CSV files +description: Switch from Neo4j to Memgraph smoothly. Detailed documentation to guide the migration process for a seamless transition to Memgraph. +--- + +import { Cards } from 'nextra/components' +import { Callout } from 'nextra/components' +import {CommunityLinks} from '/components/social-card/CommunityLinks' + +# Migrate from Neo4j to Memgraph using CSV files + +Memgraph is an open-source graph database built for streaming and compatible +with Neo4j. It uses Cypher query language and Bolt protocol. This means that you +can use the same tools and drivers that you are already using with Neo4j. Due to +the ACID compliance, data persistency and replication support in the community +version, Memgraph can be used as the main database for your applications, +instead of Neo4j. + +This tutorial will show you how to **migrate data in the CSV format from Neo4j +to Memgraph**. If you are more of a visual learner, check out the video +tutorial: + +
+ + + + +If you prefer to **export your data from Neo4j into the Cypher queries format**, +then it's necessary to [convert those files into Memgraph CYPHERL format](https://github.com/memgraph/best-practices/blob/main/import/cypher/README.md). + +Learn more about the shortest path to import data into Memgraph by reading [best +practices for import](/data-migration/best-practices). + + +## Prerequisites + +To follow this tutorial, you will need to have the following: + +- Running Neo4j instance (with your data, or use the sample data provided) +- Memgraph MAGE and Memgraph Lab installed and running: + + The easiest way to run Memgraph MAGE and Memgraph Lab is by + executing one of the following commands, depending on your operating system: + + **For Linux and macOS:** + ``` + curl https://install.memgraph.com | sh + ``` + + **For Windows:** + ``` + iwr https://windows.memgraph.com | iex + ``` + + There are other [installation + options](/getting-started/install-memgraph#installation-options) available + as well, so make sure you choose the preferred one for your environment. + +## Data schema + +One of the first steps to consider is how to migrate your data. If you have your +data in the form of [Cypher queries](/data-migration/cypherl) or +[CSV](/data-migration/csv) or [JSON](/data-migration/json) format, you can +import these formats into Memgraph. Keep in mind that for importing larger +datasets it is recommended to use CSV format or pure Cypher queries (Memgraph's +CYPHERL format), since they can be imported into Memgraph natively, faster than +JSON format. + +This tutorial will go through exporting data from Neo4j into CSV files and +importing it into Memgraph using [LOAD CSV](/data-migration/csv) +query and Memgraph's user visual interface [Memgraph Lab](/data-visualization). + +The sample dataset consists of 3 different kinds of nodes (Employee, Order and +Product) connected with 3 types of relationships as described by the graph +schema below: + +![](/pages/data-migration/migrate-from-neo4j/shipping_schema.png) + +To create this graph in your Neo4j instance run the following queries: + +```cypher +LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/orders.csv' AS column +MERGE (order:Order {orderID: column.OrderID}) + ON CREATE SET order.shipName = column.ShipName; + +LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/products.csv' AS column +MERGE (product:Product {productID: column.ProductID}) + ON CREATE SET product.productName = column.ProductName, product.unitPrice = toFloat(column.UnitPrice); + +LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/employees.csv' AS column +MERGE (e:Employee {employeeID:column.EmployeeID}) + ON CREATE SET e.firstName = column.FirstName, e.lastName = column.LastName, e.title = column.Title; + +CREATE INDEX product_id FOR (p:Product) ON (p.productID); +CREATE INDEX product_name FOR (p:Product) ON (p.productName); +CREATE INDEX employee_id FOR (e:Employee) ON (e.employeeID); +CALL db.awaitIndexes(); + +LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/orders.csv' AS column +MATCH (order:Order {orderID: column.OrderID}) +MATCH (product:Product {productID: column.ProductID}) +MERGE (order)-[op:CONTAINS]->(product) + ON CREATE SET op.unitPrice = toFloat(column.UnitPrice), op.quantity = toFloat(column.Quantity); + +LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/orders.csv' AS column +MATCH (order:Order {orderID: column.OrderID}) +MATCH (employee:Employee {employeeID: column.EmployeeID}) +MERGE (employee)-[:SOLD]->(order); + +LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/jexp/054bc6baf36604061bf407aa8cd08608/raw/8bdd36dfc88381995e6823ff3f419b5a0cb8ac4f/employees.csv' AS column +MATCH (employee:Employee {employeeID: column.EmployeeID}) +MATCH (manager:Employee {employeeID: column.ReportsTo}) +MERGE (employee)-[:REPORTS_TO]->(manager); +``` + +When migrating a particular dataset, be aware of the differences between Neo4j +and [Memgraph data types](/fundamentals/data-types). For example, Memgraph +doesn’t yet support the `POINT` spatial type, but you can represent it as a +map with the appropriate properties. + +## Exporting data from Neo4j + +Download the CSV file +[shipping.csv](https://public-assets.memgraph.com/data-migration/csv-cypher/shipping.csv) +containing the data above if you don't want to go through the exporting process. + +To get your data out of Neo4j instance, use the Neo4j APOC export functionality. +To install APOC, select the project, then in the right-side menu select *Plugins +-> APOC* and press install. + +![](/pages/data-migration/migrate-from-neo4j/install_APOC.png) + +Then enable export by setting the configuration flag `apoc.export.file.enabled` +to `true` in the `apoc.config` file located in the `config` directory. To open +the directory, select the active project, click on *...* -> *Open folder* -> +*Configuration*. + +Export the data into a CSV file using: + +```cypher +CALL apoc.export.csv.all("shipping.csv", {}) +``` + +Once exported, the file is located in Neo4j's *Import* folder. To open it, +select the active project, click on *...* -> *Open folder* -> *Import*. + +![](/pages/data-migration/migrate-from-neo4j/import_folder.png) + +## Importing data into Memgraph + +Now that the CSV file containing the needed data has been generated, let's +import data into Memgraph. + +As the original location of file is quite cumbersome, relocate it somewhere +more accessible. + +### 1. Transfer files into Memgraph container + +When working with Docker, the file needs to be transferred from your local +directory into the Docker container where Memgraph can access it. + +This can be done by copying the file into your running instance. Here are the steps to achieve that: + +1. To copy the file inside the container, open a new terminal to find out the + `CONTAINER ID` with `docker ps`, then run: + + ``` + docker cp /path_to_local_folder/shipping.csv :/usr/lib/memgraph/shipping.csv + ``` + + If the container ID is `bed1e5c9192d` and the file is locally located at + `C:/Data` the command would look like this: + + ``` + docker cp C:/Data/shipping.csv bed1:/usr/lib/memgraph/shipping.csv + ``` + +2. To check if the files are inside the container, first run: + + ``` + docker exec -it CONTAINER_ID bash + ``` + +3. List the files inside the `/usr/lib/memgraph`. + + ```nocopy + C:\Users\Vlasta>docker ps + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + bed1e5c9192d memgraph/memgraph-mage "/bin/sh -c '/usr/bi…" 2 minutes ago Up 2 minutes 0.0.0.0:7444->7444/tcp, 0.0.0.0:7687->7687/tcp recursing_blackburn + + C:\Users\Vlasta>docker cp C:/Data/shipping.csv bed1:/usr/lib/memgraph/shipping.csv + + C:\Users\Vlasta>docker exec -it bed1 bash + root@bed1e5c9192d:/# ls /usr/lib/memgraph + auth_module memgraph python_support query_modules shipping.csv + root@bed1e5c9192d:/# + ``` + +### 2. Gaining speed with indexes and analytical storage mode + +Although the dataset imported in this tutorial is quite small, one day you might +want to import really big datasets with billions of nodes and relationships and +you will require all the extra speed you can get. + +To gain speed you can [create indexes](/fundamentals/indexes) on the +properties used to connect nodes with relationships which are the values in the +`_id` column in the CSV files, and in Memgraph they will be named `nodeID`. + +**To create indexes, run:** + +```cypher +CREATE INDEX ON :Employee(nodeID); +CREATE INDEX ON :Order(nodeID); +CREATE INDEX ON :Product(nodeID); +``` + +You can also change the [storage mode](/fundamentals/storage-memory-usage) from +`IN_MEMORY_TRANSACTIONAL` to `IN_MEMORY_ANALYTICAL`. This will disable the +creation of durability files (snapshots and WAL files) and you will no longer +have any ACID guarantees. Other transactions will be able to see the changes of +ongoing transactions. Also, transaction will be able to see the changes they are +doing. This means that the transactions can be committed in random orders, and +the updates to the data, in the end, might not be correct. + +But, if you import on one thread, batch of data after a batch of data, there +should be absolutely no issues, and you will gain 6 times faster import with 6 +times less memory consumption. + +After import you can switch back to the `IN_MEMORY_TRANSACTIONAL` storage mode or +continue running analytics queries (only read queries) in the +`IN_MEMORY_ANALYTICAL` mode to continue benefiting from low memory consumption. + +To switch between modes, run the following queries on a running instance: + +```cypher +STORAGE MODE IN_MEMORY_ANALYTICAL; +STORAGE MODE IN_MEMORY_TRANSACTIONAL; +``` + +To check the current storage mode, run: + +```cypher +SHOW STORAGE INFO; +``` + +**Change the storage mode to analytical before import.** + +```cypher +STORAGE MODE IN_MEMORY_ANALYTICAL; +``` + +### 3. Importing nodes + +To import nodes using a LOAD CSV clause let's examine the clause syntax: + +```cypher +LOAD CSV FROM "csv-file-path.csv" ( WITH | NO ) HEADER [IGNORE BAD] [DELIMITER ] [QUOTE ] AS +``` + +The file is now located at `/usr/lib/memgraph/shipping.csv` and it has a header +row. There is no need to ignore bad rows, the default delimiter is `,` and +the default quote character `"`, the same as in the exported CSV file, so no +changes are necessary. + +The first row of the LOAD CSV clause therefore looks like this: + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +``` + +Nodes are always imported before relationships so they will be imported first. + +The `shipping.csv` file contains the following columns important for node +creation: `_id`, `labels`, `employeeID`, `firstName`, `lastName`, `orderID`, +`productID`, `productName`, `shipName`, `title`, `unitPrice`. + +The `_id` property is actually an internal node ID needed to create +relationships later on. + +Execute queries in Memgraph Lab. Open your browser and go to +`http://localhost:3000/`, **Connect now** to the instance and go to the **Query +Execution** section. + +#### Employee nodes + +Begin with `Employee` nodes. + +After establishing the location and format of the CSV file, filter out the rows +that contain the label `:Employee`: + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._labels = ':Employee' +``` + +Then, create nodes with a certain label and properties. As an example, let's +look at the property `_id`. To add the property to the node, define its name in +Memgraph and assigned the value of a specific column in the CSV file. + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._labels = ':Employee' +CREATE (e:Employee {nodeID: row._id}) +``` + +So `nodeID: row._id` part of the query instructs Memgraph to create a property +named `nodeID` and assign it the value paired with key `_id`. First created +node will be assigned the value from the first data row, second node from the +second data row, etc. + +**After matching up the keys and values for all properties, the finished query looks like this:** + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._labels = ':Employee' +CREATE (e:Employee {nodeID: row._id, employeeID: row.employeeID, firstName: row.firstName, lastName: row.lastName, title: row.title}); + +MATCH (e:Employee) +RETURN e; +``` + +The second query will show all 9 created nodes. + +Copy the query in the **Cypher Editor** and **Run Query**. + +![](/pages/data-migration/migrate-from-neo4j/employees.png) + +#### Order nodes + +Relevant properties for the `Order` nodes are `_id`, `orderID` and `shipName`. + +**To create `Order` nodes run the following query:** + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._labels = ':Order' +CREATE (o:Order {nodeID: row._id, orderID: row.orderID, shipName: row.shipName}); + +MATCH (o:Order) +RETURN o; +``` + +The second query will show all 830 created nodes: + +![](/pages/data-migration/migrate-from-neo4j/orders.png) + +#### Product nodes + +Relevant properties for the `Product` nodes are `_id`, `productID`, `productName` +and `unitPrice`. + +As the parser parses all the values as strings, and the `unitPrice` are numbers, +they need to be converted to appropriate data type. + +**To create `Product` nodes run the following query:** + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._labels = ':Product' +CREATE (p:Product {nodeID: row._id, productID: row.productID, productName: row.productName, unitPrice: ToFloat(row.unitPrice)}); + +MATCH (p:Product) +RETURN p; +``` + +The second query will show all 77 created nodes: + +![](/pages/data-migration/migrate-from-neo4j/product.png) + +### 4. Graph improvements + +At this point it would be nice to improve the look of the nodes visually. At the +moment, nodes in the graph are represented with their labels, but it would be +more useful if their name attribute was written. + +To adjust the look of the graph using Graph Style Language, open the Graph Style +Editor. Find the following code block: + +``` +@NodeStyle HasProperty(node, "name") { + label: AsText(Property(node, "name")) +} +``` + +It defines that if the node has the property `name`, its label on the visual +graph will be that property. + +As none of the imported nodes have the property `name`, this part of the code +needs to be adjusted to use the properties nodes do have. + +Replace those three lines of code with the following block and **Apply** the +changes: + +``` +@NodeStyle HasProperty(node, "firstName") { + label: AsText(Property(node, "firstName")) +} + +@NodeStyle HasProperty(node, "orderID") { + label: AsText(Property(node, "orderID")) +} + +@NodeStyle HasProperty(node, "productName") { + label: AsText(Property(node, "productName")) +} +``` + +![](/pages/data-migration/migrate-from-neo4j/GSS.png) + +Visual appearance of the graph can be changed in many different ways, so be sure +to check the [GSS documentation](/data-visualization/graph-style-script). + +### 5. Importing relationships + +Now that all the 916 nodes have been imported, they can be connected with relationships. + +The first row of the LOAD CSV remains the same: + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +``` + +The `shipping.csv` file contains the following values important for relationship +creation: `_type`, `_start`, `_end`, `quantity` and `unitPrice`. + +The `_type` defines relationships type, `_start` and `_end` values define which +nodes need to be connected based on their ID. + +#### :REPORTS_TO relationships + +Begin with `:REPORTS_TO` relationship. + +After establishing the location and format of the CSV file, filter out the rows +that contain the type `REPORTS_TO`: + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._type = 'REPORTS_TO' +``` + +Now instruct Memgraph to find the nodes with certain IDs in order to create a +relationship between them. As node IDs are unique we can just define that any +node with a certain ID is a starting point, and another node with a certain ID +is the end point of the relationship type `REPORTS_TO`. + +**The LOAD CSV query creates 8 `:REPORTS_TO` relationships:** + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._type = 'REPORTS_TO' +MATCH (n {nodeID: row._start}), (n2 {nodeID: row._end}) +CREATE (n)-[:REPORTS_TO]->(n2); + +MATCH p=()-[:REPORTS_TO]->() +RETURN p; +``` + +The second query returns all the nodes connected with the `REPORTS_TO` type of +relationship. + +![](/pages/data-migration/migrate-from-neo4j/reports_to.png) + +#### :SOLD relationships + +**The LOAD CSV query creates 830 `:SOLD` relationships:** + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._type = 'SOLD' +MATCH (n {nodeID: row._start}), (n2 {nodeID: row._end}) +CREATE (n)-[:SOLD]->(n2); + +MATCH p=()-[:SOLD]->() +RETURN p; +``` + +The second query returns all the nodes connected with the `SOLD` type of +relationship. + +![](/pages/data-migration/migrate-from-neo4j/sold.png) + +#### :CONTAINS relationships + +This relationship type has properties about the `quantity` of products one order +contains. + +As the parser parses all the values as strings, and the value of this +relationship property are numbers, they need to be converted to appropriate +type. + +**The LOAD CSV query creates 2155 `CONTAINS` relationships:** + +```cypher +LOAD CSV FROM "/usr/lib/memgraph/shipping.csv" WITH HEADER AS row +WITH row WHERE row._type = 'CONTAINS' +MATCH (n {nodeID: row._start}), (n2 {nodeID: row._end}) +CREATE (n)-[:CONTAINS {quantity: ToInteger(row.quantity)}]->(n2); + +MATCH p=()-[:CONTAINS]->() +RETURN p; +``` + +The second query returns all the nodes connected with the `CONTAINS` type of +relationship. + +![](/pages/data-migration/migrate-from-neo4j/contains.png) + +## After import + +Once all the 916 nodes and 2993 relationships have been imported decide whether +you want to switch back to the transactional storage mode or not. Remember that +the analytical storage mode you are using right now has no ACID compliance. + +To switch back to the analytical storage mode, run: + +```cypher +STORAGE MODE IN_MEMORY_TRANSACTIONAL; +``` + +To check the switch was successful, run: + +```cypher +SHOW STORAGE INFO; +``` + +You can query the database using the [Cypher query language](/querying), use +various [graph algorithms and modules](/advanced-algorithms) from our +open-source repository MAGE to solve graph analytics problems, create awesome +customized visual displays of your nodes and relationships with [Graph Style +Script](/data-visualization/graph-style-script) and above all - enjoy your new +graph database! + +## Memgraph's office hours + +Schedule a 30 min session with one of our engineers to discuss how Memgraph fits +with your architecture. Our engineers are highly experienced in helping +companies of all sizes to integrate and get the most out of Memgraph in their +projects. Talk to us about data modeling, optimizing queries, defining +infrastructure requirements or migrating from your existing graph database. No +nonsense or sales pitch, just tech. + +![](/pages/getting-started/memgraph-office-hours.svg) + + + + + + diff --git a/pages/data-migration/migrate-from-neo4j-using-single-cypher-query.mdx b/pages/data-migration/migrate-from-neo4j/using-single-cypher-query.mdx similarity index 89% rename from pages/data-migration/migrate-from-neo4j-using-single-cypher-query.mdx rename to pages/data-migration/migrate-from-neo4j/using-single-cypher-query.mdx index cfb320529..285aeb90a 100644 --- a/pages/data-migration/migrate-from-neo4j-using-single-cypher-query.mdx +++ b/pages/data-migration/migrate-from-neo4j/using-single-cypher-query.mdx @@ -9,26 +9,35 @@ import {CommunityLinks} from '/components/social-card/CommunityLinks' # Migrate from Neo4j to Memgraph with a single cypher query -Migrating your graph database from **Neo4j** to **Memgraph** can be done seamlessly using a single Cypher query, -thanks to Memgraph’s built-in [**migrate** module](/advanced-algorithms/available-algorithms/migrate). -This approach eliminates the need for manual data exports and imports, making migration efficient and error-free. +Migrating your graph database from **Neo4j** to **Memgraph** can be done +seamlessly using a single Cypher query, thanks to Memgraph’s built-in +[**migrate** module](/advanced-algorithms/available-algorithms/migrate). This +approach eliminates the need for manual data exports and imports, making +migration efficient and error-free. -The **migrate** module is available directly in the `memgraph/memgraph-mage` Docker image, allowing you -to execute migration queries out of the box without additional installations. +The **migrate** module is available directly in the `memgraph/memgraph-mage` +Docker image, allowing you to execute migration queries out of the box without +additional installations. ## Motivation for having the ready-made migration module -Many database migrations rely on exporting data to CSV files and then importing them into the target system. -While this method is widely used, it has several drawbacks: -- **Time-consuming**: Large datasets require significant time for export and re-import. -- **Manual intervention**: CSV formatting issues often require additional preprocessing. +Many database migrations rely on exporting data to CSV files and then importing +them into the target system. While this method is widely used, it has several +drawbacks: +- **Time-consuming**: Large datasets require significant time for export and + re-import. +- **Manual intervention**: CSV formatting issues often require additional + preprocessing. ### Benefits of direct migration from the source system -With Memgraph’s `migrate` module, you can **stream data directly from Neo4j into Memgraph**, bypassing the need for intermediate files: +With Memgraph’s `migrate` module, you can **stream data directly from Neo4j into +Memgraph**, bypassing the need for intermediate files: - **Instant data transfer**: No need to generate and handle large CSV files. -- **Automatic property mapping**: Ensures seamless migration of node and relationship properties. -- **Cypher expressiveness**: By streaming rows into Memgraph's query engine, you can migrate data and recreate nodes and relationships -in the same fashion -- **Efficient for large graphs**: Can handle millions of nodes and relationships efficiently. +- **Automatic property mapping**: Ensures seamless migration of node and + relationship properties. +- **Cypher expressiveness**: By streaming rows into Memgraph's query engine, you +can migrate data and recreate nodes and relationships in the same fashion +- **Efficient for large graphs**: Can handle millions of nodes and relationships + efficiently. - **No downtime**: Data can be transferred while Memgraph remains operational. ## Prerequisites