Q: Find the actor named "Tom Hanks".

A: MATCH (tom {name: "Tom Hanks"}) RETURN tom

Q: Find the movie with title "Cloud Atlas"...

A: MATCH (cloudAtlas {title: "Cloud Atlas"}) RETURN cloudAtlas

Q: Find 10 people...

A: MATCH (people:Person) RETURN people.name LIMIT 10

Q: Find movies released in the 1990s...

A: MATCH (nineties:Movie) WHERE nineties.released >= 1990 AND nineties.released < 2000 RETURN nineties.title

Q: List all Tom Hanks movies...

A: MATCH (tom:Person {name: "Tom Hanks"})-[:ACTED_IN]->(tomHanksMovies) RETURN tom,tomHanksMovies

Q: Who directed "Cloud Atlas"?

A: MATCH (cloudAtlas {title: "Cloud Atlas"})<-[:DIRECTED]-(directors) RETURN directors.name

Q: Tom Hanks' co-actors...

A: MATCH (tom:Person {name:"Tom Hanks"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coActors) RETURN coActors.name

Q: How people are related to "Cloud Atlas"...

A: MATCH (people:Person)-[relatedTo]-(:Movie {title: "Cloud Atlas"}) RETURN people.name, Type(relatedTo), relatedTo

Q: Movies and actors up to 4 "hops" away from Kevin Bacon

A: MATCH (bacon:Person {name:"Kevin Bacon"})-[*1..4]-(hollywood) RETURN DISTINCT hollywood

Q: Bacon path, the shortest path of any relationships to Meg Ryan

A: MATCH p=shortestPath(
(bacon:Person {name:"Kevin Bacon"})-[*]-(meg:Person {name:"Meg Ryan"})
)
RETURN p

Q: Delete all Movie and Person nodes, and their relationships

A: MATCH (n) DETACH DELETE n

Q: Prove that the Movie Graph is gone

A: MATCH (n) RETURN n

Q: Shortest path

A: MATCH (martin:RoadNode),(oliver:RoadNode),
p = shortestPath((martin)-[*..15]-(oliver))
WHERE id(martin) = 16814 AND id(oliver) = 16820
RETURN p

In [1]:
!python -m spacy download en_core_web_sm
#!python -m spacy download en_core_web_trf
!python -m spacy download en_core_web_lg
#!pip install tensorflow
#!pip install TensorRT
#!pip install spacy-transformers
#!python -m spacy download en_core_web_trf

2023-07-11 19:24:25.943325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
2023-07-11 19:24:42.473614: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow wi

In [2]:
query_templates = {
    'find_person': "MATCH (p:Person) WHERE p.name =~ '(?i){name}' RETURN p",
    'find_movie': "MATCH (m:Movie) WHERE m.title =~ '(?i){name}' RETURN m",
    'find_movie_by_relation': "MATCH q=(p:Person)-[r]->(m:Movie) WHERE type(r) in [{relationships}] and {where_clause} RETURN q",
    'find_all_movie_person': "Match q=(p:Person)-[]->(m:Movie) {where_clause} return q",
    'find_persons': "MATCH (p:Person) {where_clause} RETURN p",
    'find_movies': "MATCH (m:Movie) {where_clause} RETURN m",
    # 'shortest_path_between_two': "MATCH (({first_node}) {where_clause_1}), (({second_node}) {where_clause_2}), q = shortestPath(({first_node_nt})-[*..{hops}]-({second_node_nt})) RETURN q",
    'shortest_path_between_two': "MATCH ({first_node}), ({second_node}), q = shortestPath(({first_node_nt})-[*..{hops}]-({second_node_nt})) WHERE {where_clause_1} AND {where_clause_2} RETURN q",
    'shortest_path_from_node': "MATCH q = shortestPath({first_node}-[*..{hops}]-(p)) {where_clause} RETURN p",
    'hops_from_node': "MATCH ({first_node})-[*1..{hops}]-(q) {where_clause} RETURN DISTINCT q"
}

In [3]:
import nltk
from nltk.tokenize import word_tokenize, PunktSentenceTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import spacy
import en_core_web_sm
#import en_core_web_trf
import en_core_web_lg
from spacy.matcher import Matcher
from spacy.pipeline import EntityRuler
import re
# Load the English language model in spaCy
#nlp = spacy.load("en_core_web_sm")
nlp = spacy.load("en_core_web_lg")

In [4]:
# Perform Named Entity Recognition (NER) on the input text
nlp.tokenizer.token_match = None  # Reset the tokenization patterns
prefixes = list(nlp.Defaults.prefixes) + [r"\b[A-Z]\w*(?:-[A-Z]\w*)?$"]  # Add a pattern for capitalized words
suffixes = list(nlp.Defaults.suffixes) + [r"(?:-[A-Z]\w*)+\b"]  # Add a pattern for hyphenated words
infixes = list(nlp.Defaults.infixes) + [r"(?<=[a-zA-Z])-(?=[a-zA-Z])"]  # Add a pattern for hyphen between words
nlp.tokenizer.prefix_search = spacy.util.compile_prefix_regex(prefixes).search
nlp.tokenizer.suffix_search = spacy.util.compile_suffix_regex(suffixes).search
nlp.tokenizer.infix_finditer = spacy.util.compile_infix_regex(infixes).finditer

if nlp.has_pipe("entity_ruler"):
  nlp.remove_pipe("entity_ruler")
ruler = nlp.add_pipe("entity_ruler", before="ner")
movies_entries = ["A Few Good Men", "The Matrix","The Matrix Reloaded","The Matrix Revolutions","The Devil's Advocate","A Few Good Men","Top Gun","Jerry Maguire","Stand By Me","As Good as It Gets","What Dreams May Come","Snow Falling on Cedars","You've Got Mail","Sleepless in Seattle","Joe Versus the Volcano","When Harry Met Sally","That Thing You Do","The Replacements","RescueDawn","The Birdcage","Unforgiven","Johnny Mnemonic","Cloud Atlas","The Da Vinci Code","V for Vendetta","Speed Racer","Ninja Assassin","The Green Mile","Frost/Nixon","Hoffa","Apollo 13","Twister","Cast Away","One Flew Over the Cuckoo's Nest","Something's Gotta Give","Bicentennial Man","Charlie Wilson's War","The Polar Express","A League of Their Own"]
for movie in movies_entries:
  ruler.add_patterns([{"label": "WORK_OF_ART", "pattern": movie.lower()}])

person_entries = ["Keanu Reeves","Carrie-Anne Moss","Laurence Fishburne","Hugo Weaving","Lilly Wachowski","Lana Wachowski","Joel Silver","Emil Eifrem","Charlize Theron","Al Pacino","Taylor Hackford","Tom Cruise","Jack Nicholson","Demi Moore","Kevin Bacon","Kiefer Sutherland","Noah Wyle","Cuba Gooding Jr.","Kevin Pollak","J.T. Walsh","James Marshall","Christopher Guest","Rob Reiner","Aaron Sorkin","Kelly McGillis","Val Kilmer","Anthony Edwards","Tom Skerritt","Meg Ryan","Tony Scott","Jim Cash","Renee Zellweger","Kelly Preston","Jerry O'Connell","Jay Mohr","Bonnie Hunt","Regina King","Jonathan Lipnicki","Cameron Crowe","River Phoenix","Corey Feldman","Wil Wheaton","John Cusack","Marshall Bell","Helen Hunt","Greg Kinnear","James L. Brooks","Annabella Sciorra","Max von Sydow","Werner Herzog","Robin Williams","Vincent Ward","Ethan Hawke","Rick Yune","James Cromwell","Scott Hicks","Parker Posey","Dave Chappelle","Steve Zahn","Tom Hanks","Nora Ephron","Rita Wilson","Bill Pullman","Victor Garber","Rosie O'Donnell","John Patrick Stanley","Nathan Lane","Billy Crystal","Carrie Fisher","Bruno Kirby","Liv Tyler","Brooke Langton","Gene Hackman","Orlando Jones","Howard Deutch","Christian Bale","Zach Grenier","Mike Nichols","Richard Harris","Clint Eastwood","Takeshi Kitano","Dina Meyer","Ice-T","Robert Longo","Halle Berry","Jim Broadbent","Tom Tykwer","David Mitchell","Stefan Arndt","Ian McKellen","Audrey Tautou","Paul Bettany","Ron Howard","Natalie Portman","Stephen Rea","John Hurt","Ben Miles","Emile Hirsch","John Goodman","Susan Sarandon","Matthew Fox","Christina Ricci","Rain","Naomie Harris","Michael Clarke Duncan","David Morse","Sam Rockwell","Gary Sinise","Patricia Clarkson","Frank Darabont","Frank Langella","Michael Sheen","Oliver Platt","Danny DeVito","John C. Reilly","Ed Harris","Bill Paxton","Philip Seymour Hoffman","Jan de Bont","Robert Zemeckis","Milos Forman","Diane Keaton","Nancy Meyers","Chris Columbus","Julia Roberts","Madonna","Geena Davis","Lori Petty","Penny Marshall","Paul Blythe","Angela Scope","Jessica Thompson","James Thompson"]
for person in person_entries:
  ruler.add_patterns([{"label": "PERSON", "pattern": person.lower()}])

In [5]:
# Relationship Extraction
def relation_extraction(entities, verb_entities):
    entities_labels = [item[1] for item in entities]
    relationship_found = False
    if ('MOVIE' in entities_labels or 'WORK_OF_ART' in entities_labels) or 'PERSON' in entities_labels:
      if any(verb in verb_entities for verb in ['acted', 'act', 'perform', 'performed']):
        entities.append(('ACTED_IN', 'RELATIONSHIP'))
        relationship_found = True
      if any(verb in verb_entities for verb in ['follows', 'followed', 'monitored']):
        entities.append(('FOLLOWS', 'RELATIONSHIP'))
        relationship_found = True
      if any(verb in verb_entities for verb in ['directed', 'director', 'direction']):
        entities.append(('DIRECTED', 'RELATIONSHIP'))
        relationship_found = True
      if any(verb in verb_entities for verb in ['produced', 'made', 'bank rolled', 'rolled']):
        entities.append(('PRODUCED', 'RELATIONSHIP'))
        relationship_found = True
      if any(verb in verb_entities for verb in ['judged', 'review', 'reviewed']):
        entities.append(('REVIEWED', 'RELATIONSHIP'))
        relationship_found = True
      if any(verb in verb_entities for verb in ['written', 'wrote', 'author', 'authored']):
        entities.append(('WROTE', 'RELATIONSHIP'))
        relationship_found = True
    if not relationship_found and ('MOVIE' in entities_labels or 'WORK_OF_ART' in entities_labels) and 'PERSON' in entities_labels:
      entities.append(('ALL', 'RELATIONSHIP'))
    if relationship_found:
      if sum(1 for entity in entities if entity[1] == 'PERSON') == 0:
        entities.append(('', 'PERSON'))
      elif sum(1 for entity in entities if entity[1] == 'MOVIE') == 0 and sum(1 for entity in entities if entity[1] == 'WORK_OF_ART') == 0:
        entities.append(('', 'MOVIE'))
    return entities


def recognize_entities(text):
    doc = nlp(text)
    entities = []
    for ent in doc.ents:
        entities.append((ent.text, ent.label_))

    verb_entities = []
    for token in doc:
        if token.pos_ == "NOUN" and token.text.lower() in ["film", "movie", "films", "movies", "cinema", "cinemas"]:
          entities.append(("", "MOVIE"))
        elif token.pos_ == "NOUN" and token.text.lower() in ["actor", "actress", "star", "stars", "actors", "actresses", "people"]:
          entities.append(("", "PERSON"))
        elif token.pos_ == "NOUN" and token.text.lower() in ["hop", "hops", "step", "steps", "node", "nodes"]:
          entities.append(("", "HOPS"))
        elif token.pos_ == "ADJ" and token.text.lower() in ["closest", "close", "short", "shortest", "near", "nearest"]:
          entities.append(("", "PATH"))
        elif token.pos_ == "VERB":
          verb_entities.append(token.text.lower())
        if token.dep_ == "dobj" and token.head.lower_ in ["what", "where", "how"]:
            entities.append((token.text, "QUESTION_OBJECT"))
        if (token.dep_ == 'compound'):
          entities.append((token.text + ' ' + token.head.text , "NAME"))
    entities = relation_extraction(entities, verb_entities)
    if text.find("'") > -1 and text.rfind("'") < len(text):
      start_index = text.find("'") + 1
      end_index = text.rfind("'")
      attribute = text[start_index:end_index]
      entities.append((attribute, "ATTRIBUTE"))
    return entities

In [6]:
ner = nlp.get_pipe("ner")
# Get all the unique entity labels recognized by the NER component
labels = ner.labels

# Print the entity labels
for label in labels:
    print(label)

CARDINAL
DATE
EVENT
FAC
GPE
LANGUAGE
LAW
LOC
MONEY
NORP
ORDINAL
ORG
PERCENT
PERSON
PRODUCT
QUANTITY
TIME
WORK_OF_ART


In [7]:
def generate_cypher_query(text):
  entities_mapping = {
      'WORK_OF_ART': 'Movie',
      'PERSON': 'Person',
      'MOVIE': 'Movie'
  }

  entities = recognize_entities(text)
  labels = [item[1] for item in entities]
  query_template = ''
  queries = []
  relations_template = ''
  limit_template = ''
  for entity in entities:
    (name, entity_label) = entity
    # SHORTEST PATH
    if entity_label == "PATH":
      where_clause_1 = ''
      where_clause_2 = ''
      first_node = ''
      second_node = ''
      first_node_nt = ''
      second_node_nt = ''

      hops = 10
      query_template = query_templates['shortest_path_between_two'] #'shortest_path_between_two': "MATCH (({first_node}) {where_clause_1}), (({second_node}) {where_clause_2}), q = shortestPath(({first_node_nt})-[*..{hops}]-({second_node_nt})) RETURN q",
      # First node
      for item, label in entities:
        if label == "PERSON" and item != '':
          first_node = "p1:Person"
          first_node_nt = 'p1'
          if where_clause_1 != '':
            where_clause_1 += " OR ANY(attribute IN keys(p1) WHERE p1[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause_1 = "ANY(attribute IN keys(p1) WHERE p1[attribute] =~ '(?i)" + item + "')"
          entities.remove((item, label))
          break
        elif label == "MOVIE" and item != '':
          first_node = "m1:Movie"
          first_node_nt = 'm1'
          if where_clause_1 != '':
            where_clause_1 += " OR ANY(attribute IN keys(m1) WHERE m1[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause_1 = "ANY(attribute IN keys(m1) WHERE m1[attribute] =~ '(?i)" + item + "')"
          entities.remove((item, label))
          break
        elif label == "WORK_OF_ART" and item != '':
          first_node = "m1:Movie"
          first_node_nt = 'm1'
          if where_clause_1 != '':
            where_clause_1 += " OR ANY(attribute IN keys(m1) WHERE m1[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause_1 = "ANY(attribute IN keys(m1) WHERE m1[attribute] =~ '(?i)" + item + "')"
          entities.remove((item, label))
          break

      # Second node
      for item, label in entities:
        if label == "PERSON" and item != '':
          second_node = "p2:Person"
          second_node_nt = 'p2'
          if where_clause_2 != '':
            where_clause_2 += " OR ANY(attribute IN keys(p2) WHERE p2[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause_2 = "ANY(attribute IN keys(p2) WHERE p2[attribute] =~ '(?i)" + item + "')"
          entities.remove((item, label))
          break
        elif label == "MOVIE" and item != '':
          second_node = "m2:Movie"
          second_node_nt = 'm2'
          if where_clause_2 != '':
            where_clause_2 += " OR ANY(attribute IN keys(m2) WHERE m2[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause_2 = "ANY(attribute IN keys(m2) WHERE m2[attribute] =~ '(?i)" + item + "')"
          entities.remove((item, label))
          break
        elif label == "WORK_OF_ART" and item != '':
          second_node = "m2:Movie"
          second_node_nt = 'm2'
          if where_clause_2 != '':
            where_clause_2 += " OR ANY(attribute IN keys(m2) WHERE m2[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause_2 = "ANY(attribute IN keys(m2) WHERE m2[attribute] =~ '(?i)" + item + "')"
          entities.remove((item, label))
          break

      return query_template.replace('{first_node}', first_node).replace('{first_node_nt}', first_node_nt).replace('{second_node}', second_node).replace('{second_node_nt}', second_node_nt).replace('{where_clause_1}', where_clause_1).replace('{where_clause_2}', where_clause_2).replace('{hops}', str(hops))

    # HOPS
    if entity_label == "HOPS":
      where_clause = ''
      first_node = ''
      query_template = query_templates['hops_from_node']
      for item, label in entities:
        if label == "PERSON" and item != '':
          first_node = "p:Person"
          if where_clause != '':
            where_clause += " OR ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause = "WHERE ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
        elif label == "MOVIE" and item != '':
          first_node = "m:Movie"
          if where_clause != '':
            where_clause += " OR ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause = "WHERE ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
        elif label == "WORK_OF_ART" and item != '':
          first_node = "m:Movie"
          if where_clause != '':
            where_clause += " OR ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
          else:
            where_clause = "WHERE ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
        elif label == "CARDINAL" and re.match(r'^\d+$', item):
          query_template = query_template.replace('{hops}', item)
      return query_template.replace('{first_node}', first_node).replace('{where_clause}', where_clause)

    # PERSON
    if entity_label == 'PERSON' and sum(1 for entity in entities if entity[1] == 'RELATIONSHIP') == 0:
      if name != '':
        queries.append(query_templates['find_person'].replace('{name}', name))
      else:
        if sum(1 for entity in entities if entity[1] == 'NAME') == 0:
          where_clause = ''
          for item, label in entities:
            if label in ['ATTRIBUTE', 'GPE']:
              if where_clause != '':
                where_clause += ' OR ' + " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
              else:
                where_clause += "WHERE ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')" + where_clause
            elif label in ['DATE']:
              if where_clause != '':
                where_clause += ' OR ' + " ANY(attribute IN keys(p) WHERE p[attribute] = " + item + ")"
              else:
                where_clause += "WHERE ANY(attribute IN keys(p) WHERE p[attribute] = " + item + ")" + where_clause
          queries.append(query_templates['find_persons'].replace('{where_clause}', where_clause))
        else:
          for item, label in entities:
            if label == 'NAME':
              if name != '':
                queries.append(query_templates['find_person'].replace('{name}', item))
              elif sum(1 for entity in entities if entity[1] == 'PERSON') == 1:
                queries.append(query_templates['find_persons'])
              break
    # WORK OF ART
    elif entity_label == 'WORK_OF_ART' and sum(1 for entity in entities if entity[1] == 'RELATIONSHIP') == 0:
      queries.append(query_templates['find_movie'].replace('{name}', name))
    # MOVIE
    elif entity_label == 'MOVIE' and sum(1 for entity in entities if entity[1] == 'RELATIONSHIP') == 0:
      if sum(1 for entity in entities if entity[1] == 'NAME') == 0:
        where_clause = ''
        for item, label in entities:
          if label in ['ATTRIBUTE', 'GPE']:
            if where_clause != '':
              where_clause += ' OR ' + " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
            else:
              where_clause += "WHERE ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')" + where_clause
          elif label in ['DATE']:
            if where_clause != '':
              where_clause += ' OR ' + " ANY(attribute IN keys(m) WHERE m[attribute] = " + item + ")"
            else:
              where_clause += "WHERE ANY(attribute IN keys(m) WHERE m[attribute] = " + item + ")" + where_clause
        queries.append(query_templates['find_movies'].replace('{where_clause}', where_clause))
      else:
        for item, label in entities:
          if label == 'NAME':
            if name != '':
              queries.append(query_templates['find_movie'].replace('{name}', item))
            elif sum(1 for entity in entities if entity[1] in ['WORK_OF_ART', 'MOVIE']) == 1:
              queries.append(query_templates['find_movies'])
            break
    # RELATIONSHIP
    elif entity_label == 'RELATIONSHIP':
      if name == 'ALL':
        where_clause = ''
        for item, label in entities:
          if label == 'PERSON' and item != '':
            if where_clause != '':
              where_clause += 'OR ' + " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
            else:
              where_clause += "WHERE ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
          elif label == 'WORK_OF_ART' and item != '':
            if where_clause != '':
              where_clause += 'OR ' + " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
            else:
              where_clause += "WHERE ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
          elif label == "NAME" and item != '' and sum(1 for entity in entities if entity[1] == 'PERSON') == 1:
            if where_clause != '':
              where_clause += 'OR ' + " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
            else:
              where_clause += "WHERE ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
          elif label == "NAME" and item != '' and sum(1 for entity in entities if entity[1] == 'MOVIE') == 1:
            if where_clause != '':
              where_clause += 'OR ' + " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
            else:
              where_clause += "WHERE ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
        queries.append(query_templates['find_all_movie_person'].replace('{where_clause}', where_clause))
      else:
        if relations_template != '':
          relations_template += ", '" + name + "'"
        else:
          relations_template += "'" + name + "'"
    # CARDINAL
    elif entity_label == 'CARDINAL':
      limit_template = ' LIMIT ' + name

  # ALL RELATIONS
  if 'RELATIONSHIP' in labels and relations_template != '':
    where_clause = ''
    for item, label in entities:
      if label == 'PERSON' and item != '':
        if where_clause != '':
          where_clause += 'OR ' + " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
        else:
          where_clause += " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
      elif label == 'WORK_OF_ART' and item != '':
        if where_clause != '':
          where_clause += 'OR ' + " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
        else:
          where_clause += " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
      elif label == "NAME" and item != '' and sum(1 for entity in entities if entity[1] == 'PERSON') == 1:
        if where_clause != '':
          where_clause += 'OR ' + " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
        else:
          where_clause += " ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)" + item + "')"
      elif label == "NAME" and item != '' and sum(1 for entity in entities if entity[1] == 'MOVIE') == 1:
        if where_clause != '':
          where_clause += 'OR ' + " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
        else:
          where_clause += " ANY(attribute IN keys(m) WHERE m[attribute] =~ '(?i)" + item + "')"
    queries.append(query_templates['find_movie_by_relation'].replace('{relationships}', relations_template).replace('{where_clause}', '(' + where_clause + ')'))

  for query in queries:
    if query_template != '':
      query_template += ' UNION ' + query + limit_template
    else:
      query_template += query + limit_template

  return query_template

In [8]:
text = "Find the actor named Tom Hanks"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "Find the movie with title Cloud Atlas"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "Find 10 people born in 1929"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "Find movies released in the 1990s ninties"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "List all Tom Hanks movies"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "Who directed Cloud Atlas?"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "Tom Hanks co-actors"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "How people are related to Cloud Atlas"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

text = "Movies and actors up to 4 hops away from Kevin Bacon"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

# MATCH (bacon:Person {name:"Kevin Bacon"})-[*1..4]-(hollywood) RETURN DISTINCT hollywood

text = "Kevin Bacon, the shortest path of any relationships to Meg Ryan"
print(text)
print(generate_cypher_query(text.lower()))
print('------------------------------------------------')

# MATCH p=shortestPath((bacon:Person {name:"Kevin Bacon"})-[*]-(meg:Person {name:"Meg Ryan"})) RETURN p

# Shortest path

# MATCH (martin:RoadNode),(oliver:RoadNode), p = shortestPath((martin)-[*..15]-(oliver))  WHERE id(martin) = 16814 AND id(oliver) = 16820 RETURN p

Find the actor named Tom Hanks
MATCH (p:Person) WHERE p.name =~ '(?i)tom hanks' RETURN p
------------------------------------------------
Find the movie with title Cloud Atlas
MATCH (m:Movie) WHERE m.title =~ '(?i)cloud atlas' RETURN m
------------------------------------------------
Find 10 people born in 1929
MATCH (p:Person) WHERE ANY(attribute IN keys(p) WHERE p[attribute] = 1929) RETURN p LIMIT 10
------------------------------------------------
Find movies released in the 1990s ninties
MATCH (m:Movie)  RETURN m
------------------------------------------------
List all Tom Hanks movies
Match q=(p:Person)-[]->(m:Movie) WHERE ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)tom hanks')OR  ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)tom hanks')OR  ANY(attribute IN keys(p) WHERE p[attribute] =~ '(?i)hanks movies') return q
------------------------------------------------
Who directed Cloud Atlas?
MATCH q=(p:Person)-[r]->(m:Movie) WHERE type(r) in ['DIRECTED'] and ( ANY(attr

In [9]:
# Example text
#text = "Find the movie with title you've got mail"
#text = "Find 10 people that are born in 1929"
#text = "Find 10 movies that are released in 1929"
#text = "Find the actor named Robert Downey Jr."
#text = "Find the movie with tagline 'Everything is connected' and corrected and released in 1929"
# text = "List all Tom Hanks movies"
# text = "Who directed Cloud Atlas?"
# text = "How people are related to Cloud Atlas"
# text = "Tom Hanks co-actors" #imp
text = 'Movies and actors within 4 nodes away from Tom Hanks' #hops, nodes, node, hop, steps, step
#text = "Bacon path, the shortest path of any relationships to Meg Ryan"
text = 'the short path between Joaael Silver and TOP GUN'
text = text.lower()
# Process the text
doc = nlp(text)

#Access dependency parse information
# for token in doc:
#     print(token.text, token.dep_, token.head.text)

# for token in doc:
#     print(token.text, token.pos_)

# entities = recognize_entities(text)
# print(entities)

print(generate_cypher_query(text))

# 'shortest_path_between_two': "MATCH q = shortestPath({first_node}-[*..{hops}]-{second_node}) RETURN q",
#     'shortest_path_from_node': "MATCH q = shortestPath({first_node}-[*..{hops}]-{second_node}) RETURN p"

MATCH (p1:Person), (m2:Movie), q = shortestPath((p1)-[*..10]-(m2)) WHERE ANY(attribute IN keys(p1) WHERE p1[attribute] =~ '(?i)joaael silver') AND ANY(attribute IN keys(m2) WHERE m2[attribute] =~ '(?i)top gun') RETURN q


In [11]:
# import pickle

# # Load the model from the pickle file
# with open('nlp_model.pkl', 'rb') as f:
#     nlp = pickle.load(f)

# # Use the loaded NLP model
# doc = nlp("Some text to process")

In [12]:
# import pickle

# # Save the model using pickle
# with open('nlp_model.pkl', 'wb') as f:
#     pickle.dump(nlp, f)
