<a href="https://colab.research.google.com/github/richardkilea/Neo4J/blob/main/Vectors_in_Knowledge_graphs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import packages and set up Neo4j

In [None]:
!pip install python-docx
!pip install python-dotenv
!pip install langchain
!pip install langchainhub
!pip install openai langchain-openai
!pip install -U langchain-openai
!pip install langchain_community
!pip install neo4j

In [2]:
from dotenv import load_dotenv
import os
import getpass, sys, re

from langchain_community.graphs import Neo4jGraph

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [16]:
NEO4J_URI = getpass.getpass('Enter the URI: ')
NEO4J_USERNAME = getpass.getpass('Enter the username: ')
NEO4J_PASSWORD = getpass.getpass('Enter the Password: ')
OPENAI_API_KEY = getpass.getpass('Enter openAI KEy: ')

Enter the URI: ··········
Enter the username: ··········
Enter the Password: ··········
Enter openAI KEy: ··········


In [39]:
if os.getenv('OPENAI_BASE_URL'):
    OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'
else:
    OPENAI_ENDPOINT = None

In [19]:
# Connect to the knowledge graph instance using LangChain
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)

### Create a vector index

In [20]:
kg.query("""
  CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS
  FOR (m:Movie) ON (m.taglineEmbedding)
  OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
  }}"""
)


[]

In [21]:
kg.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 8,
  'name': 'movie_tagline_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Movie'],
  'properties': ['taglineEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': None,
  'readCount': None}]

### Populate the vector index
- Calculate vector representation for each movie tagline using OpenAI
- Add vector to the `Movie` node as `taglineEmbedding` property

In [24]:
kg.query("""
    MATCH (movie:Movie) WHERE movie.tagline IS NOT NULL
    WITH movie, genai.vector.encode(
        movie.tagline,
        "OpenAI",
        {
          token: $openAiApiKey
        }) AS vector
    CALL db.create.setNodeVectorProperty(movie, "taglineEmbedding", vector)
    """,
    params={"openAiApiKey":OPENAI_API_KEY})

[]

In [25]:
result = kg.query("""
    MATCH (m:Movie)
    WHERE m.tagline IS NOT NULL
    RETURN m.tagline, m.taglineEmbedding
    LIMIT 1
    """
)

In [26]:
result[0]['m.tagline']

'Welcome to the Real World'

In [27]:
result[0]['m.taglineEmbedding'][:10]

[0.017444109544157982,
 -0.005461932625621557,
 -0.0019986676052212715,
 -0.025609157979488373,
 -0.014403513632714748,
 0.016723278909921646,
 -0.017064034938812256,
 0.0005156398983672261,
 -0.025215977802872658,
 -0.02954096347093582]

In [28]:
len(result[0]['m.taglineEmbedding'])

1536

### Similarity search
- Calculate embedding for question
- Identify matching movies based on similarity of question and `taglineEmbedding` vectors

In [29]:
question = "What movies are about love?"

In [42]:
kg.query("""
    WITH genai.vector.encode(
        $question,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings',
        $top_k,
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={"openAiApiKey":OPENAI_API_KEY,
            "openAiEndpoint": OPENAI_ENDPOINT,
            "question": question,
            "top_k": 5
            })

[{'movie.title': 'RescueDawn',
  'movie.tagline': "Based on the extraordinary true story of one man's fight for freedom",
  'score': 0.8997488021850586},
 {'movie.title': 'Cast Away',
  'movie.tagline': 'At the edge of the world, his journey begins.',
  'score': 0.8985832929611206},
 {'movie.title': 'Ninja Assassin',
  'movie.tagline': 'Prepare to enter a secret world of assassins',
  'score': 0.8880105018615723},
 {'movie.title': 'Joe Versus the Volcano',
  'movie.tagline': 'A story of love, lava and burning desire.',
  'score': 0.8869996070861816},
 {'movie.title': 'As Good as It Gets',
  'movie.tagline': 'A comedy from the heart that goes for the throat.',
  'score': 0.8855895400047302}]

### Try for yourself: ask you own question!
- Change the question below and run the graph query to find different movies

In [43]:
question = "What movies are about horror?"

In [44]:
kg.query("""
    WITH genai.vector.encode(
        $question,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings',
        $top_k,
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={"openAiApiKey":OPENAI_API_KEY,
            "openAiEndpoint": OPENAI_ENDPOINT,
            "question": question,
            "top_k": 5
            })

[{'movie.title': 'Twister',
  'movie.tagline': "Don't Breathe. Don't Look Back.",
  'score': 0.8952538967132568},
 {'movie.title': 'As Good as It Gets',
  'movie.tagline': 'A comedy from the heart that goes for the throat.',
  'score': 0.890769898891449},
 {'movie.title': "The Devil's Advocate",
  'movie.tagline': 'Evil has its winning ways',
  'score': 0.8824365735054016},
 {'movie.title': 'Ninja Assassin',
  'movie.tagline': 'Prepare to enter a secret world of assassins',
  'score': 0.8816023468971252},
 {'movie.title': 'Joe Versus the Volcano',
  'movie.tagline': 'A story of love, lava and burning desire.',
  'score': 0.8787840604782104}]