Skip to content

Commit

Permalink
Add Cypher lexer (#1423)
Browse files Browse the repository at this point in the history
This commit adds a lexer for Cypher.

Co-authored-by: Michael Camilleri <mike@inqk.net>
  • Loading branch information
ggrossetie and pyrmont committed Apr 14, 2020
1 parent 7bf2159 commit 66f45b7
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 0 deletions.
5 changes: 5 additions & 0 deletions lib/rouge/demos/cypher
@@ -0,0 +1,5 @@
// Cypher Mode for Rouge
CREATE (john:Person {name: 'John'})
MATCH (user)-[:friend]->(follower)
WHERE user.name IN ['Joe', 'John', 'Sara', 'Maria', 'Steve'] AND follower.name =~ 'S.*'
RETURN user.name, follower.name
108 changes: 108 additions & 0 deletions lib/rouge/lexers/cypher.rb
@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Cypher < RegexLexer
tag 'cypher'
aliases 'cypher'
filenames '*.cypher'
mimetypes 'application/x-cypher-query'

title "Cypher"
desc 'The Cypher query language (neo4j.com/docs/cypher-manual)'

def self.functions
@functions ||= Set.new %w(
ABS ACOS ALLSHORTESTPATHS ASIN ATAN ATAN2 AVG CEIL COALESCE COLLECT
COS COT COUNT DATE DEGREES E ENDNODE EXP EXTRACT FILTER FLOOR
HAVERSIN HEAD ID KEYS LABELS LAST LEFT LENGTH LOG LOG10 LOWER LTRIM
MAX MIN NODE NODES PERCENTILECONT PERCENTILEDISC PI RADIANS RAND
RANGE REDUCE REL RELATIONSHIP RELATIONSHIPS REPLACE REVERSE RIGHT
ROUND RTRIM SHORTESTPATH SIGN SIN SIZE SPLIT SQRT STARTNODE STDEV
STDEVP STR SUBSTRING SUM TAIL TAN TIMESTAMP TOFLOAT TOINT TOINTEGER
TOSTRING TRIM TYPE UPPER
)
end

def self.predicates
@predicates ||= Set.new %w(
ALL AND ANY CONTAINS EXISTS HAS IN NONE NOT OR SINGLE XOR
)
end

def self.keywords
@keywords ||= Set.new %w(
AS ASC ASCENDING ASSERT BY CASE COMMIT CONSTRAINT CREATE CSV CYPHER
DELETE DESC DESCENDING DETACH DISTINCT DROP ELSE END ENDS EXPLAIN
FALSE FIELDTERMINATOR FOREACH FROM HEADERS IN INDEX IS JOIN LIMIT
LOAD MATCH MERGE NULL ON OPTIONAL ORDER PERIODIC PROFILE REMOVE
RETURN SCAN SET SKIP START STARTS THEN TRUE UNION UNIQUE UNWIND USING
WHEN WHERE WITH CALL YIELD
)
end

state :root do
rule %r/[\s]+/, Text
rule %r(//.*?$), Comment::Single

rule %r([*+\-<>=&|~%^]), Operator
rule %r/[{}),;\[\]]/, Str::Symbol

# literal number
rule %r/(\w+)(:)(\s*)(-?[._\d]+)/ do
groups Name::Label, Str::Delimiter, Text::Whitespace, Num
end

# function-like
# - "name("
# - "name ("
# - "name ("
rule %r/(\w+)(\s*)(\()/ do |m|
name = m[1].upcase
if self.class.functions.include? name
groups Name::Function, Text::Whitespace, Str::Symbol
elsif self.class.keywords.include? name
groups Keyword, Text::Whitespace, Str::Symbol
else
groups Name, Text::Whitespace, Str::Symbol
end
end

rule %r/:\w+/, Name::Class

# number range
rule %r/(-?\d+)(\.\.)(-?\d+)/ do
groups Num, Operator, Num
end

# numbers
rule %r/(\d+\.\d*|\d*\.\d+)(e[+-]?\d+)?/i, Num::Float
rule %r/\d+e[+-]?\d+/i, Num::Float
rule %r/0[0-7]+/, Num::Oct
rule %r/0x[a-f0-9]+/i, Num::Hex
rule %r/\d+/, Num::Integer

rule %r([.\w]+:), Name::Property

# remaining "("
rule %r/\(/, Str::Symbol

rule %r/[.\w$]+/ do |m|
match = m[0].upcase
if self.class.predicates.include? match
token Operator::Word
elsif self.class.keywords.include? match
token Keyword
else
token Name
end
end

rule %r/"(\\\\|\\"|[^"])*"/, Str::Double
rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
rule %r/`(\\\\|\\`|[^`])*`/, Str::Backtick
end
end
end
end
18 changes: 18 additions & 0 deletions spec/lexers/cypher_spec.rb
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Cypher do
let(:subject) { Rouge::Lexers::Cypher.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.cypher'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'application/x-cypher-query'
end
end
end
102 changes: 102 additions & 0 deletions spec/visual/samples/cypher
@@ -0,0 +1,102 @@
// Cypher for Rouge
CREATE (john:Person {name: 'John'})
CREATE (joe:Person {name: 'Joe'})
CREATE (steve:Person {name: 'Steve'})
CREATE (sara:Person {name: 'Sara'})
CREATE (maria:Person {name: 'Maria'})
CREATE (john)-[:KNOWS]->(joe)-[:KNOWS]->(steve)
CREATE (john)-[:KNOWS]->(sara)-[:KNOWS]->(maria)

MATCH (joe { name: 'Joe' })-[:knows*2..2]-(friend_of_friend)
WHERE NOT (joe)-[:knows]-(friend_of_friend)
RETURN friend_of_friend.name, COUNT(*)
ORDER BY COUNT(*) DESC , friend_of_friend.name

LOAD CSV WITH HEADERS FROM "https://dl.dropboxusercontent.com/u/14493611/movies_setup.csv" AS row
MERGE (m:Movie {title:row.title}) ON CREATE SET m.tagline = row.tagline,m.released=row.released
MERGE (p:Person {name:row.name}) ON CREATE SET p.born = row.born
FOREACH (_ in CASE row.type WHEN "ACTED_IN" then [1] else [] end |
MERGE (p)-[r:ACTED_IN]->(m) ON CREATE SET r.roles = split(row.roles,";")[0..-1]
)
FOREACH (_ in CASE row.type WHEN "DIRECTED" then [1] else [] end | MERGE (p)-[:DIRECTED]->(m))
FOREACH (_ in CASE row.type WHEN "PRODUCED" then [1] else [] end | MERGE (p)-[:PRODUCED]->(m))
FOREACH (_ in CASE row.type WHEN "WROTE" then [1] else [] end | MERGE (p)-[:WROTE ]->(m))
FOREACH (_ in CASE row.type WHEN "REVIEWED" then [1] else [] end | MERGE (p)-[:REVIEWED]->(m))

MATCH (n:Person { name: $name })
RETURN n

UNWIND $props AS properties
CREATE (n:Person)
SET n = properties
RETURN n

MATCH (n)
WHERE id(n)= $id
RETURN n.name

START n=node:people(name = $value)
RETURN n

MATCH (n) RETURN n // This is an end of line comment

MATCH (n) WHERE n.property = '//This is NOT a comment' RETURN n

MATCH (a:Person { name: 'Keanu Reeves' })
RETURN [(a)-->(b) WHERE b:Movie | b.released] AS years

// alias with backticks
MATCH (p:Person {born: 1965})
RETURN p.name AS name, p.born AS `birth year`

// complex queries
LOAD CSV WITH HEADERS FROM 'https://data.neo4j.com/advanced-cypher/movies2.csv' AS row
MERGE (m:Movie {id:toInteger(row.movieId)})
ON CREATE SET m.title=row.title, m.avgVote=toFloat(row.avgVote),
m.releaseYear=toInteger(row.releaseYear), m.genres=split(row.genres,":")
MERGE (p:Person {id: toInteger(row.personId)})
ON CREATE SET p.name = row.name, p.born = toInteger(row.birthYear),
p.died = toInteger(row.deathYear)
WITH row, m, p
CALL apoc.do.when(row.personType = 'ACTOR',
"MERGE (p)-[:ACTED_IN {roles: split(coalesce(row.characters,''), ':')}]->(m)
ON CREATE SET p:Actor",
"MERGE (p)-[:DIRECTED]->(m)
ON CREATE SET p:Director",
{row:row, m:m, p:p}) YIELD value AS value
SET p:Person // cannot end query with APOC call

PROFILE LOAD CSV WITH HEADERS FROM
'https://data.neo4j.com/advanced-cypher/movies2.csv' AS row
WITH row.movieId as movieId, row.title as title, row.genres as genres,
toInteger(row.releaseYear) as releaseYear, toFloat(row.avgVote) as avgVote,
collect({id: row.personId, name:row.name, born: toInteger(row.birthYear), died:toInteger(row.deathYear),personType: row.personType, roles: split(coalesce(row.characters,""),':')}) as people
MERGE (m:Movie {id:movieId})
ON CREATE SET m.title=title, m.avgVote=avgVote,
m.releaseYear=releaseYear, m.genres=split(genres,":")
WITH *
UNWIND people as person
MERGE (p:Person {id: person.id})
ON CREATE SET p.name = person.name, p.born = person.born, p.died = person.died
WITH m, person, p
CALL apoc.do.when(person.personType = 'ACTOR',
"MERGE (p)-[:ACTED_IN {roles: person.roles}]->(m)
ON CREATE SET p:Actor",
"MERGE (p)-[:DIRECTED]->(m)
ON CREATE SET p:Director",
{m:m, p:p, person:person}) YIELD value AS value
RETURN count() // cannot end query with APOC call

MATCH (m:Movie)
WITH collect(m.title) AS Movies, collect (m.released) AS Released
WITH Movies, Released,
[x IN Released | date().year - x + 1] AS YearsAgo
RETURN Movies, YearsAgo

MATCH (p:Person)-[rel:ACTED_IN]->(m:Movie {title: 'The Matrix'})
RETURN p, rel, m

MATCH (p:Person)-[rel]->(:Movie {title:'The Matrix'})
RETURN p.name, type(rel)

RETURN sign(-17), sign(0.1), sign(0xABCDEF), sign(000)

0 comments on commit 66f45b7

Please sign in to comment.