This is a QA system based on KG querying  - this only owrks with Python 3.10!

Next Steps

    Expand the Knowledge Graph:

        Add more entities and relationships.

        Use a larger dataset like Wikidata or DBpedia.

    Improve NLP:

        Use more advanced models like BERT or GPT for entity and relationship extraction.

    Add a User Interface:

        Build a web or chatbot interface for user interaction.

    Optimize Query Execution:

        Use a graph database like Neo4j for faster querying.

This system is a starting point and can be extended to handle more complex queries and larger datasets. Let me know if you need help with any specific part!

In [1]:
# %pip install rdflib
# %pip install spacy


In [1]:
# Step 1: Build or Use a Knowledge Graph

from rdflib import Graph, URIRef, Literal, Namespace

# Create a graph
kg = Graph()

# Define a namespace
ex = Namespace("http://example.org/")

# Add triples to the graph
kg.add((ex.Alice, ex.works_at, ex.Google))
kg.add((ex.Bob, ex.works_at, ex.Microsoft))
kg.add((ex.Markus, ex.works_at, ex.MIT))
kg.add((ex.Google, ex.location, ex.California))
kg.add((ex.Microsoft, ex.location, ex.Washington))
kg.add((ex.MIT, ex.location, ex.Cambridge))

# Serialize the graph (optional)
kg.serialize("knowledge_graph.ttl", format="turtle")

<Graph identifier=N677a45700c864516a7064d2e3021ab68 (<class 'rdflib.graph.Graph'>)>

In [34]:
# Step 2: Natural Language Processing - entity recognition

import spacy

# Load a pre-trained NLP model
nlp = spacy.load("en_core_web_sm")

def parse_question(question):
    doc = nlp(question)
    entities = [ent.text for ent in doc.ents]  # Extract entities
    verbs = [token.text for token in doc if token.pos_ == "VERB"]  # Extract verbs
    return entities, verbs

# Example
# question = "Where does Alice work?"
# entities, verbs = parse_question(question)
# print("Entities:", entities)  # ['Alice']
# print("Verbs:", verbs)        # ['work']
# question = "Where does Bob work?"
# entities, verbs = parse_question(question)
# print("Entities:", entities)  # ['Bob']
# print("Verbs:", verbs)        # ['work']
# question = "Where does Markus work?"
# entities, verbs = parse_question(question)
# print("Entities:", entities)  # ['Markus']
# print("Verbs:", verbs)        # ['work']
question = "Where is MIT located?"
entities, verbs = parse_question(question)
print("Entities:", entities)  # ['MIT']
print("Verbs:", verbs)        # ['located']


Entities: ['MIT']
Verbs: ['located']


In [35]:
#  Step 3: Map Natural Language to Knowledge Graph Queries  - this is hard coded for now

def create_query1(entities, verbs):
    if "work" in verbs:
        return f"SELECT ?company WHERE {{ <{entities[0]}> <works_at> ?company }}"
    elif "location" in verbs:
        return f"SELECT ?location WHERE {{ <{entities[0]}> <works_at> ?company . ?company <location> ?location }}"
    else:
        return None

# def create_query2(entities, verbs):
#     if "located" in verbs:
#         return f"SELECT ?company WHERE {{ <{entities[0]}> <location> ?location }}"
#     else:
#         return None
    
def create_query2(entities):
    return f"SELECT ?location WHERE {{ <{entities[0]}> <location> ?location }}"


# Example
query = create_query1(entities, verbs)
print("Query:", query)
print(entities)
query = create_query2(entities)
print("Query:", query)

Query: None
['MIT']
Query: SELECT ?location WHERE { <MIT> <location> ?location }


In [39]:
# Step 4: Execute the Query on the Knowledge Graph

def execute_query(graph, query):
    results = graph.query(query)
    return [str(result[0]) for result in results]

# Example1
query = """
    SELECT ?company WHERE {
        <http://example.org/Alice> <http://example.org/works_at> ?company .
    }
"""

# Example2
query = """
    SELECT ?location WHERE {
        <http://example.org/MIT> <http://example.org/location> ?location .
    }
"""
results = execute_query(kg, query)
print("Results:", results)  # ['http://example.org/Google']

Results: ['http://example.org/Cambridge']


In [42]:
# Step 5: Generate a Natural Language Response

def generate_response(results, question):
    if "work" in question:
        return f"{results[0].split('/')[-1]}"
    elif "located" in question:
        return f"{results[0].split('/')[-1]}"
    else:
        return "I don't know the answer."

# Example
response = generate_response(results, question)
print("Response:", response)  # 'Google'

Response: Cambridge
