-
Notifications
You must be signed in to change notification settings - Fork 63
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: allow alternative vector db engine to be used #106
Changes from 9 commits
f79631d
00b60a9
11231b7
a23fc40
c9b2a06
91864dc
de972df
a0e9860
4f76c46
9fd542c
d0939b9
409d3c7
a197177
0d230c9
d5c7c66
b6a2a40
f5c0e27
7c66364
636b548
0603fa8
3577be3
e896fa3
20d8bc3
e2db4d7
ddb9914
89f0d0a
39b346d
b68580c
6a69279
adedfa4
7466818
e660410
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,10 @@ __pycache__/ | |
*.py[cod] | ||
*$py.class | ||
|
||
notebooks/ | ||
full_run.ipynb | ||
evals/ | ||
|
||
# C extensions | ||
*.so | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -5,7 +5,11 @@ | |||||
import nltk | ||||||
from asyncio import Lock | ||||||
from nltk.corpus import stopwords | ||||||
|
||||||
from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine | ||||||
from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine | ||||||
from cognee.infrastructure.databases.graph.config import get_graph_config | ||||||
from cognee.infrastructure.databases.vector.embeddings.LiteLLMEmbeddingEngine import LiteLLMEmbeddingEngine | ||||||
from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \ | ||||||
graph_ready_output, connect_nodes_in_graph | ||||||
from cognee.modules.cognify.graph.add_data_chunks import add_data_chunks, add_data_chunks_basic_rag | ||||||
|
@@ -23,7 +27,7 @@ | |||||
from cognee.modules.data.get_content_summary import get_content_summary | ||||||
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers | ||||||
from cognee.modules.data.get_layer_graphs import get_layer_graphs | ||||||
from cognee.shared.data_models import KnowledgeGraph | ||||||
from cognee.shared.data_models import KnowledgeGraph, ChunkStrategy, ChunkEngine | ||||||
from cognee.shared.utils import send_telemetry | ||||||
from cognee.modules.tasks import create_task_status_table, update_task_status | ||||||
from cognee.shared.SourceCodeGraph import SourceCodeGraph | ||||||
|
@@ -45,9 +49,9 @@ async def cognify(datasets: Union[str, List[str]] = None): | |||||
stopwords.ensure_loaded() | ||||||
create_task_status_table() | ||||||
|
||||||
graph_config = get_graph_config() | ||||||
graph_db_type = graph_config.graph_engine | ||||||
graph_client = await get_graph_client(graph_db_type) | ||||||
# graph_config = get_graph_config() | ||||||
# graph_db_type = graph_config.graph_engine | ||||||
graph_client = await get_graph_client() | ||||||
|
||||||
relational_config = get_relationaldb_config() | ||||||
db_engine = relational_config.database_engine | ||||||
|
@@ -61,14 +65,19 @@ async def handle_cognify_task(dataset_name: str): | |||||
async with update_status_lock: | ||||||
task_status = get_task_status([dataset_name]) | ||||||
|
||||||
if task_status == "DATASET_PROCESSING_STARTED": | ||||||
if dataset_name in task_status and task_status[dataset_name] == "DATASET_PROCESSING_STARTED": | ||||||
logger.info(f"Dataset {dataset_name} is being processed.") | ||||||
return | ||||||
|
||||||
update_task_status(dataset_name, "DATASET_PROCESSING_STARTED") | ||||||
|
||||||
await cognify(dataset_name) | ||||||
update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") | ||||||
try: | ||||||
await cognify(dataset_name) | ||||||
update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") | ||||||
except Exception as error: | ||||||
update_task_status(dataset_name, "DATASET_PROCESSING_ERROR") | ||||||
raise error | ||||||
|
||||||
|
||||||
# datasets is a list of dataset names | ||||||
if isinstance(datasets, list): | ||||||
|
@@ -89,7 +98,7 @@ async def handle_cognify_task(dataset_name: str): | |||||
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset))) | ||||||
|
||||||
chunk_config = get_chunk_config() | ||||||
chunk_engine = chunk_config.chunk_engine | ||||||
chunk_engine = get_chunk_engine() | ||||||
chunk_strategy = chunk_config.chunk_strategy | ||||||
|
||||||
async def process_batch(files_batch): | ||||||
|
@@ -139,6 +148,8 @@ async def process_batch(files_batch): | |||||
|
||||||
for (dataset_name, files) in dataset_files: | ||||||
for file_metadata in files: | ||||||
from cognee.infrastructure.databases.graph.config import get_graph_config | ||||||
graph_config = get_graph_config() | ||||||
graph_topology = graph_config.graph_model | ||||||
|
||||||
if graph_topology == SourceCodeGraph: | ||||||
|
@@ -171,7 +182,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi | |||||
print(f"Processing chunk ({chunk_id}) from document ({file_metadata['id']}).") | ||||||
|
||||||
graph_config = get_graph_config() | ||||||
graph_client = await get_graph_client(graph_config.graph_engine) | ||||||
graph_client = await get_graph_client() | ||||||
graph_topology = graph_config.graph_model | ||||||
|
||||||
if graph_topology == SourceCodeGraph: | ||||||
|
@@ -240,52 +251,52 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi | |||||
|
||||||
|
||||||
|
||||||
# if __name__ == "__main__": | ||||||
if __name__ == "__main__": | ||||||
|
||||||
# async def test(): | ||||||
# # await prune.prune_system() | ||||||
# # # | ||||||
# # from cognee.api.v1.add import add | ||||||
# # data_directory_path = os.path.abspath("../../../.data") | ||||||
# # # print(data_directory_path) | ||||||
# # # config.data_root_directory(data_directory_path) | ||||||
# # # cognee_directory_path = os.path.abspath("../.cognee_system") | ||||||
# # # config.system_root_directory(cognee_directory_path) | ||||||
# # | ||||||
# # await add("data://" +data_directory_path, "example") | ||||||
async def test(): | ||||||
# await prune.prune_system() | ||||||
# # | ||||||
# from cognee.api.v1.add import add | ||||||
# data_directory_path = os.path.abspath("../../../.data") | ||||||
# # print(data_directory_path) | ||||||
# # config.data_root_directory(data_directory_path) | ||||||
# # cognee_directory_path = os.path.abspath("../.cognee_system") | ||||||
# # config.system_root_directory(cognee_directory_path) | ||||||
# | ||||||
# await add("data://" +data_directory_path, "example") | ||||||
|
||||||
# text = """import subprocess | ||||||
# def show_all_processes(): | ||||||
# process = subprocess.Popen(['ps', 'aux'], stdout=subprocess.PIPE) | ||||||
# output, error = process.communicate() | ||||||
text = """Conservative PP in the lead in Spain, according to estimate | ||||||
An estimate has been published for Spain: | ||||||
|
||||||
# if error: | ||||||
# print(f"Error: {error}") | ||||||
# else: | ||||||
# print(output.decode()) | ||||||
Opposition leader Alberto Núñez Feijóo’s conservative People’s party (PP): 32.4% | ||||||
|
||||||
# show_all_processes()""" | ||||||
|
||||||
# from cognee.api.v1.add import add | ||||||
|
||||||
# await add([text], "example_dataset") | ||||||
|
||||||
# infrastructure_config.set_config( {"chunk_engine": LangchainChunkEngine() , "chunk_strategy": ChunkStrategy.CODE,'embedding_engine': LiteLLMEmbeddingEngine() }) | ||||||
# from cognee.shared.SourceCodeGraph import SourceCodeGraph | ||||||
# from cognee.api.v1.config import config | ||||||
|
||||||
# # config.set_graph_model(SourceCodeGraph) | ||||||
# # config.set_classification_model(CodeContentPrediction) | ||||||
# # graph = await cognify() | ||||||
# vector_client = infrastructure_config.get_config("vector_engine") | ||||||
|
||||||
# out = await vector_client.search(collection_name ="basic_rag", query_text="show_all_processes", limit=10) | ||||||
|
||||||
# print("results", out) | ||||||
# # | ||||||
# # from cognee.shared.utils import render_graph | ||||||
# # | ||||||
# # await render_graph(graph, include_color=True, include_nodes=False, include_size=False) | ||||||
|
||||||
# import asyncio | ||||||
# asyncio.run(test()) | ||||||
Spanish prime minister Pedro Sánchez’s Socialist party (PSOE): 30.2% | ||||||
|
||||||
The far-right Vox party: 10.4% | ||||||
|
||||||
In Spain, the right has sought to turn the European election into a referendum on Sánchez. | ||||||
|
||||||
Ahead of the vote, public attention has focused on a saga embroiling the prime minister’s wife, Begoña Gómez, who is being investigated over allegations of corruption and influence-peddling, which Sanchez has dismissed as politically-motivated and totally baseless.""" | ||||||
|
||||||
from cognee.api.v1.add import add | ||||||
|
||||||
await add([text], "example_dataset") | ||||||
|
||||||
from cognee.api.v1.config.config import config | ||||||
config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE ) | ||||||
config.set_chunk_strategy(ChunkStrategy.LANGCHAIN_CHARACTER) | ||||||
config.embedding_engine = LiteLLMEmbeddingEngine() | ||||||
|
||||||
graph = await cognify() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The variable - graph = await cognify() Committable suggestion
Suggested change
ToolsRuff
|
||||||
# vector_client = infrastructure_config.get_config("vector_engine") | ||||||
# | ||||||
# out = await vector_client.search(collection_name ="basic_rag", query_text="show_all_processes", limit=10) | ||||||
# | ||||||
# print("results", out) | ||||||
# | ||||||
# from cognee.shared.utils import render_graph | ||||||
# | ||||||
# await render_graph(graph, include_color=True, include_nodes=False, include_size=False) | ||||||
|
||||||
import asyncio | ||||||
asyncio.run(test()) |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Specify the exception type to improve error handling.
Tools
Ruff