## Data Processing

In [1]:
from rag_kg.pipeline.process_data import filter_data, create_entity_relation_data, dump_data


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_openai.chat_models.azure import AzureChatOpenAI
* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
filtered_data = filter_data(["paris"])

In [3]:
relations = create_entity_relation_data(filtered_data[:10])
dump_data(relations)

0 [{'entity1': {'name': 'Johann Stamitz', 'type': 'person'}, 'entity2': {'name': 'No. 3', 'type': 'book'}, 'relation': 'written_by'}, {'entity1': {'name': 'Paris', 'type': 'city'}, 'entity2': {'name': 'France', 'type': 'country'}, 'relation': 'located_in'}]
1 [{'entity1': {'name': 'Brussels', 'type': 'city'}, 'entity2': {'name': 'countries', 'type': 'country'}, 'relation': 'associated_with'}, {'entity1': {'name': 'Paris', 'type': 'city'}, 'entity2': {'name': 'countries', 'type': 'country'}, 'relation': 'associated_with'}]
2 [{'entity1': {'name': '1871', 'type': 'date'}, 'entity2': {'name': '1990', 'type': 'date'}, 'relation': 'known_as'}, {'entity1': {'name': 'Paris Commune', 'type': 'event'}, 'entity2': {'name': '1871', 'type': 'date'}, 'relation': 'happened_on'}]
3 [{'entity1': {'name': '1931 European Rowing Championships', 'type': 'event'}, 'entity2': {'name': 'Seine', 'type': 'city'}, 'relation': 'held_in'}, {'entity1': {'name': '1931 European Rowing Championships', 'type': 'event'

## Construct Knowledge Graph

In [4]:
from rag_kg.knowledge_graph.script import clear_database, populate_database, get_all_entities_relations, get_entity_relation

In [5]:
clear_database()
populate_database()

<neo4j._sync.work.result.Result at 0x24f43488070>

In [6]:
result = get_all_entities_relations()

## Query Data

In [7]:
from rag_kg.llm_utils.query_processing import EntityExtractor, create_similar_queries

In [8]:
sentence = "What are some sport events that held and some books written in cities of France?"
entities_extractor = EntityExtractor()
entities = entities_extractor.extract(sentence)
entities

[{'name': 'sport events', 'type': 'event'},
 {'name': 'books', 'type': 'book'},
 {'name': 'France', 'type': 'country'}]

In [9]:
result = get_entity_relation(entities)

In [None]:
closest_entities = []
for record in result:
	relation = f"{dict(record['entity2'])} - {dict(record['rel'])} -> {dict(record['entity1'])}"
	if relation not in closest_entities:
		closest_entities.append(relation)

queries = create_similar_queries(query=sentence, n_queries=5, closest_entities=closest_entities)
queries

In [11]:
from rag_kg.utils.schema import ComplexityTypes
from rag_kg.llm_utils.query_processing import analyze_query_complexity, get_sequential_queries_with_dependency

def query_planning(query):
	complexity = analyze_query_complexity(query)
	if complexity == ComplexityTypes.SINGLE_HOP:
		return "Execute the query directly."
	elif complexity == ComplexityTypes.MULTIPLE_HOP:
		sequential_queries = get_sequential_queries_with_dependency(query)
		print(sequential_queries)
		for subquery in sequential_queries.yield_subquery_idx_to_execute():
			print("Query:", subquery)
			print("Dependencies:", subquery.dependencies)
			print("Extracted Entities:", EntityExtractor().extract(subquery.query))

query_planning(sentence)

subqueries=[SubQuery(query='What are some cities in France?', dependencies=[]), SubQuery(query='What sport events were held in [city]?', dependencies=[0]), SubQuery(query='What books were written in [city]?', dependencies=[0]), SubQuery(query='Combine the sport events and books written in cities of France.', dependencies=[1, 2])]
Query: query='What are some cities in France?' dependencies=[]
Dependencies: []
Extracted Entities: [{'name': 'France', 'type': 'country'}]
Query: query='What sport events were held in [city]?' dependencies=[0]
Dependencies: [0]
Extracted Entities: [{'name': '[city]', 'type': 'city'}]
Query: query='What books were written in [city]?' dependencies=[0]
Dependencies: [0]
Extracted Entities: [{'name': '[city]', 'type': 'city'}]
Query: query='Combine the sport events and books written in cities of France.' dependencies=[1, 2]
Dependencies: [1, 2]
Extracted Entities: [{'name': 'sport events', 'type': 'event'}, {'name': 'books', 'type': 'book'}, {'name': 'cities', 't