In [13]:
# ! pip install -U langchain-community
# ! pip install llama-index
# %pip install --upgrade --quiet  langchain langchain-community langchain-openai neo4j


Note: you may need to restart the kernel to use updated packages.


In [2]:
import getpass
import os

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")


## Load and preprocess text data

In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = TextLoader("file.txt")

# Load the document
documents = loader.load()

# Split the text into chunks
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
texts = text_splitter.split_documents(documents)

## Extract Entitites and relationships

In [12]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Initialize the language model
llm = OpenAI(temperature=0)

# Define the prompt template for entity and relationship extraction
entity_relation_template = """
Extract the main entities and their relationships from the following text:

{text}

Entities:
1.
2.
3.

Relationships:
1.
2.
3.
"""

# Create the prompt
entity_relation_prompt = PromptTemplate(
    input_variables=["text"],
    template=entity_relation_template
)

# Create the chain
entity_relation_chain = LLMChain(llm=llm, prompt=entity_relation_prompt)

# Extract entities and relationships for each text chunk
entities_and_relations = []
for doc in texts:
    result = entity_relation_chain.run(doc.page_content)
    entities_and_relations.append(result)

# Print the results
for i, result in enumerate(entities_and_relations):
    print(f"Chunk {i + 1}:")
    print(result)
    print("\n" + "-"*50 + "\n")


  llm = OpenAI(temperature=0)
  entity_relation_chain = LLMChain(llm=llm, prompt=entity_relation_prompt)
  result = entity_relation_chain.run(doc.page_content)


Chunk 1:

Entities:
1. Sarah
2. Michael
3. prismaticAI
4. Westside Valley
5. software engineer
6. data scientist
7. graduate studies
8. technology company
9. diverse workforce
10. talented individuals
11. innovative products and services
12. clients

Relationships:
1. Sarah is an employee at prismaticAI.
2. Michael is also an employee at prismaticAI.
3. Sarah and Michael both work at prismaticAI.
4. Sarah has been working at prismaticAI for the past three years.
5. Michael joined prismaticAI two years ago.
6. Michael completed his graduate studies before joining prismaticAI.
7. prismaticAI is a leading technology company.
8. prismaticAI is based in Westside Valley.
9. prismaticAI specializes in developing cutting-edge software solutions and artificial intelligence applications.
10. prismaticAI has a diverse workforce.
11. Sarah and Michael are highly skilled professionals.
12. Sarah and Michael contribute significantly to prismaticAI's success.
13. Sarah and Michael work closely with t

# Store the knowledge graph in Neo4J

In [None]:
from neo4j import GraphDatabase
import os

# Neo4j connection details
uri = "bolt://localhost:7687"  # Update with your Neo4j URI
username = os.environ.get("USERNAME")  # Set this environment variable for security
password = os.environ.get("PASSWORD")  # Set this environment variable for security

# Initialize Neo4j driver
driver = GraphDatabase.driver(uri, auth=(username, password))

def create_entity(tx, entity):
    query = (
        "MERGE (e:Entity {name: $name}) "
        "ON CREATE SET e.type = $type "
        "RETURN e"
    )
    return tx.run(query, name=entity['name'], type=entity['type'])

def create_relationship(tx, source, target, relationship):
    query = (
        "MATCH (s:Entity {name: $source}), (t:Entity {name: $target}) "
        "MERGE (s)-[r:RELATES_TO {type: $relationship}]->(t) "
        "RETURN r"
    )
    return tx.run(query, source=source, target=target, relationship=relationship)

# Function to parse entities and relationships from the extracted text
def parse_entities_and_relationships(text):
    lines = text.strip().split('\n')
    entities = []
    relationships = []
    current_section = None

    for line in lines:
        if line.startswith('Entities:'):
            current_section = 'entities'
        elif line.startswith('Relationships:'):
            current_section = 'relationships'
        elif line.strip() and current_section == 'entities':
            entities.append({'name': line.split('.', 1)[1].strip(), 'type': 'Unknown'})
        elif line.strip() and current_section == 'relationships':
            parts = line.split('.', 1)[1].strip().split(' - ')
            if len(parts) == 3:
                relationships.append({
                    'source': parts[0].strip(),
                    'target': parts[2].strip(),
                    'type': parts[1].strip()
                })

    return entities, relationships

# Store the knowledge graph in Neo4j
with driver.session() as session:
    for result in entities_and_relations:
        entities, relationships = parse_entities_and_relationships(result)
        
        # Create entities
        for entity in entities:
            session.execute_write(create_entity, entity)
        
        # Create relationships
        for rel in relationships:
            session.execute_write(create_relationship, rel['source'], rel['target'], rel['type'])

print("Knowledge graph has been stored in Neo4j.")

# Close the driver
driver.close()


## Retrieve knowledge for RAG

In [17]:
from langchain.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain.llms import OpenAI
username = os.environ.get("USERNAME") 

# Initialize the Neo4jGraph
graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username=username,
    password=password # Using the password from the .env file
)

# Initialize OpenAI LLM
llm = OpenAI(temperature=0)

# Set up the GraphCypherQAChain
qa_chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True  # Acknowledge the risks

)

print("GraphCypherQAChain has been set up successfully.")


GraphCypherQAChain has been set up successfully.


# Generate responses

In [19]:
# Define a list of questions to ask
questions = [
    "Where does Sarah work?",
    "Who works for prismaticAI?",
    "Does Michael work for the same company as Sarah?"
]

# Generate responses for each question
for question in questions:
    print(f"Question: {question}")
    response = qa_chain.run(question)
    print(f"Response: {response}\n")


Question: Where does Sarah work?


[1m> Entering new GraphCypherQAChain chain...[0m




Generated Cypher:
[32;1m[1;3m

MATCH (e:Entity)-[:WORKS_AT]->(c:Company)
WHERE e.name = 'Sarah'
RETURN c.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
Response:  I don't know the answer.

Question: Who works for prismaticAI?


[1m> Entering new GraphCypherQAChain chain...[0m




Generated Cypher:
[32;1m[1;3m

MATCH (e:Entity)-[:WORKS_FOR]->(p:Entity {name: "prismaticAI"})
RETURN e.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
Response:  I don't know the answer.

Question: Does Michael work for the same company as Sarah?


[1m> Entering new GraphCypherQAChain chain...[0m




Generated Cypher:
[32;1m[1;3m

MATCH (m:Entity {name: 'Michael'})-[:WORKS_FOR]->(c:Entity)<-[:WORKS_FOR]-(s:Entity {name: 'Sarah'})
RETURN m, c, s[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
Response:  I don't know the answer.

