### Imports

In [1]:
from typing import List, Optional
from langchain.chains.openai_functions import create_structured_output_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains.graph_qa.cypher_utils import CypherQueryCorrector, Schema
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.neo4j_vector import Neo4jVector
import os

### Connect to the Neo4j database and instantiate an LLM Transformer

In [2]:
openai_apikey = ""
url = "bolt://localhost:7687"
username = "neo4j"
password = ""
graph = Neo4jGraph(url=url, username=username, password=password)

llm = ChatOpenAI(temperature=0, model_name="gpt-4-0125-preview", openai_api_key=openai_apikey)
llm_transformer = LLMGraphTransformer(llm=llm)

### Define an Embeddings Model and a List of Entities to Create Embeddings for

In [4]:
# embedding model
embeddings_model = "text-embedding-3-small"  # Or another model like "text-davinci-003" for embeddings
entities_list = ['Date', 'Document', 'Event', 'Group', 'Person']

### Provide Email Text and Build the Knowledge Graph (KG)

Loop through all the emails in the emails folder and create a KG using text present in them.

In [5]:
email_folder = 'emails'  # Folder where your email files are stored

for i in range(1, 11):  # Assuming you have 10 emails, labeled email1.txt to email10.txt
    file_path = os.path.join(email_folder, f'email{i}.txt')
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
            documents = [Document(page_content=text)]
            graph_documents = llm_transformer.convert_to_graph_documents(documents)
            print(f'Processing {file_path}')
            print(f"Nodes: {graph_documents[0].nodes}")
            print(f"Relationships: {graph_documents[0].relationships}")

            graph.add_graph_documents(graph_documents)
    except Exception as e:
        print(f"Failed to process {file_path}: {str(e)}")

Processing emails/email1.txt
Nodes: [Node(id='Q2 Financial Report', type='Report'), Node(id='Kickoff Meeting', type='Event'), Node(id='Emily', type='Person')]
Relationships: [Relationship(source=Node(id='Emily', type='Person'), target=Node(id='Kickoff Meeting', type='Event'), type='ORGANIZER'), Relationship(source=Node(id='Kickoff Meeting', type='Event'), target=Node(id='Q2 Financial Report', type='Report'), type='DISCUSSION_TOPIC')]
Processing emails/email2.txt
Nodes: [Node(id='Emily', type='Person'), Node(id='Tom', type='Person')]
Relationships: [Relationship(source=Node(id='Tom', type='Person'), target=Node(id='Emily', type='Person'), type='COMMUNICATION')]
Processing emails/email3.txt
Nodes: [Node(id='Emily', type='Person'), Node(id='Sophia', type='Person')]
Relationships: [Relationship(source=Node(id='Sophia', type='Person'), target=Node(id='Emily', type='Person'), type='COMMUNICATION')]
Processing emails/email4.txt
Nodes: [Node(id='Emily', type='Person'), Node(id='Raj', type='Per

### Embed Entities in the KG using the "id" Property

In [6]:
def embed_entities(entity_type):
    vector_index = Neo4jVector.from_existing_graph(
        OpenAIEmbeddings(model=embeddings_model,openai_api_key=openai_apikey),
        url=url,
        username=username,
        password=password,
        index_name=entity_type,
        node_label=entity_type,
        text_node_properties=['id'],
        embedding_node_property='embedding',
    )
    

for t in entities_list:
    embed_entities(t)

  warn_deprecated(
