In [None]:
import os
os.environ['GRAPHISTRY_USERNAME'] = input("Please enter your graphistry username")
os.environ['GRAPHISTRY_PASSWORD'] = input("Please enter your graphistry password")

In [None]:
import os
import pathlib
import cognee
from cognee.infrastructure.databases.relational import create_db_and_tables

notebook_path = os.path.abspath("")
data_directory_path = str(pathlib.Path(os.path.join(notebook_path, ".data_storage/code_graph")).resolve())
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(pathlib.Path(os.path.join(notebook_path, ".cognee_system/code_graph")).resolve())
cognee.config.system_root_directory(cognee_directory_path)

await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata = True)

await create_db_and_tables()

In [None]:
from os import path
from pathlib import Path
from cognee.infrastructure.files.storage import LocalStorage
import git

notebook_path = path.abspath("")
repo_clone_location = path.join(notebook_path, ".data/graphrag")

LocalStorage.remove_all(repo_clone_location)

git.Repo.clone_from(
    "git@github.com:microsoft/graphrag.git",
    Path(repo_clone_location),
    branch = "main",
    single_branch = True,
)

In [None]:
from cognee.tasks.repo_processor import enrich_dependency_graph, expand_dependency_graph, get_repo_file_dependencies
from cognee.tasks.storage import add_data_points
from cognee.modules.pipelines.tasks.Task import Task

tasks = [
    Task(get_repo_file_dependencies),
    Task(add_data_points, task_config = { "batch_size": 50 }),
    Task(enrich_dependency_graph, task_config = { "batch_size": 50 }),
    Task(expand_dependency_graph, task_config = { "batch_size": 50 }),
    Task(add_data_points, task_config = { "batch_size": 50 }),
]

In [None]:
from cognee.modules.pipelines import run_tasks

notebook_path = os.path.abspath("")
repo_clone_location = os.path.join(notebook_path, ".data/graphrag")

pipeline = run_tasks(tasks, repo_clone_location, "code_graph_pipeline")

async for result in pipeline:
    print(result)

In [None]:
from cognee.shared.utils import render_graph
await render_graph(None, include_nodes = True, include_labels = True)

# Let's check the evaluations

In [None]:
from evals.eval_on_hotpot import eval_on_hotpotQA
from evals.eval_on_hotpot import answer_with_cognee
from evals.eval_on_hotpot import answer_without_cognee
from evals.eval_on_hotpot import eval_answers
from cognee.base_config import get_base_config
from pathlib import Path
from tqdm import tqdm
import wget
import json
import statistics

In [None]:
answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee
num_samples = 10 # With cognee, it takes ~1m10s per sample

base_config = get_base_config()
data_root_dir = base_config.data_root_directory

if not Path(data_root_dir).exists():
    Path(data_root_dir).mkdir()

filepath = data_root_dir / Path("hotpot_dev_fullwiki_v1.json")
if not filepath.exists():
    url = 'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json'
    wget.download(url, out=data_root_dir)

with open(filepath, "r") as file:
    dataset = json.load(file)

instances = dataset if not num_samples else dataset[:num_samples]
answers = []
for instance in tqdm(instances, desc="Getting answers"):
    answer = answer_provider(instance)
    answers.append(answer)